From: David Hildenbrand <david@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org, David Hildenbrand <david@redhat.com>,
Andrew Morton <akpm@linux-foundation.org>,
Linus Torvalds <torvalds@linux-foundation.org>,
Ryan Roberts <ryan.roberts@arm.com>,
Matthew Wilcox <willy@infradead.org>,
Hugh Dickins <hughd@google.com>,
Yin Fengwei <fengwei.yin@intel.com>,
Yang Shi <shy828301@gmail.com>, Ying Huang <ying.huang@intel.com>,
Zi Yan <ziy@nvidia.com>, Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@redhat.com>, Will Deacon <will@kernel.org>,
Waiman Long <longman@redhat.com>,
"Paul E. McKenney" <paulmck@kernel.org>
Subject: [PATCH WIP v1 18/20] atomic_seqcount: use atomic add-return instead of atomic cmpxchg on 64bit
Date: Fri, 24 Nov 2023 14:26:23 +0100 [thread overview]
Message-ID: <20231124132626.235350-19-david@redhat.com> (raw)
In-Reply-To: <20231124132626.235350-1-david@redhat.com>
Turns out that it can be beneficial on some HW to use an add-return instead
of and atomic cmpxchg. However, we have to deal with more possible races
now: in the worst case, each and every CPU might try becoming the exclusive
writer at the same time, so we need the same number of bits as for the
shared writer case.
In case we detect that we didn't end up being the exclusive writer,
simply back off and convert to a shared writer.
Only implement this optimization on 64bit, where we can steal more bits
from the actual sequence without sorrow.
Signed-off-by: David Hildenbrand <david@redhat.com>
---
include/linux/atomic_seqcount.h | 43 +++++++++++++++++++++++++++------
1 file changed, 36 insertions(+), 7 deletions(-)
diff --git a/include/linux/atomic_seqcount.h b/include/linux/atomic_seqcount.h
index 00286a9da221..9cd40903863d 100644
--- a/include/linux/atomic_seqcount.h
+++ b/include/linux/atomic_seqcount.h
@@ -42,9 +42,10 @@ typedef struct raw_atomic_seqcount {
#define ATOMIC_SEQCOUNT_SHARED_WRITERS_MAX 0x0000000000008000ul
#define ATOMIC_SEQCOUNT_SHARED_WRITERS_MASK 0x000000000000fffful
#define ATOMIC_SEQCOUNT_EXCLUSIVE_WRITER 0x0000000000010000ul
-#define ATOMIC_SEQCOUNT_WRITERS_MASK 0x000000000001fffful
-/* We have 48bit for the actual sequence. */
-#define ATOMIC_SEQCOUNT_SEQUENCE_STEP 0x0000000000020000ul
+#define ATOMIC_SEQCOUNT_EXCLUSIVE_WRITERS_MASK 0x00000000ffff0000ul
+#define ATOMIC_SEQCOUNT_WRITERS_MASK 0x00000000fffffffful
+/* We have 32bit for the actual sequence. */
+#define ATOMIC_SEQCOUNT_SEQUENCE_STEP 0x0000000100000000ul
#else /* CONFIG_64BIT */
@@ -53,6 +54,7 @@ typedef struct raw_atomic_seqcount {
#define ATOMIC_SEQCOUNT_SHARED_WRITERS_MAX 0x00000040ul
#define ATOMIC_SEQCOUNT_SHARED_WRITERS_MASK 0x0000007ful
#define ATOMIC_SEQCOUNT_EXCLUSIVE_WRITER 0x00000080ul
+#define ATOMIC_SEQCOUNT_EXCLUSIVE_WRITERS_MASK 0x00000080ul
#define ATOMIC_SEQCOUNT_WRITERS_MASK 0x000000fful
/* We have 24bit for the actual sequence. */
#define ATOMIC_SEQCOUNT_SEQUENCE_STEP 0x00000100ul
@@ -144,7 +146,7 @@ static inline bool raw_read_atomic_seqcount_retry(raw_atomic_seqcount_t *s,
static inline bool raw_write_atomic_seqcount_begin(raw_atomic_seqcount_t *s,
bool try_exclusive)
{
- unsigned long seqcount, seqcount_new;
+ unsigned long __maybe_unused seqcount, seqcount_new;
BUILD_BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT));
#ifdef CONFIG_DEBUG_ATOMIC_SEQCOUNT
@@ -160,6 +162,32 @@ static inline bool raw_write_atomic_seqcount_begin(raw_atomic_seqcount_t *s,
if (unlikely(seqcount & ATOMIC_SEQCOUNT_WRITERS_MASK))
goto shared;
+#ifdef CONFIG_64BIT
+ BUILD_BUG_ON(__builtin_popcount(ATOMIC_SEQCOUNT_EXCLUSIVE_WRITERS_MASK) !=
+ __builtin_popcount(ATOMIC_SEQCOUNT_SHARED_WRITERS_MASK));
+
+ /* See comment for atomic_long_try_cmpxchg() below. */
+ seqcount = atomic_long_add_return(ATOMIC_SEQCOUNT_EXCLUSIVE_WRITER,
+ &s->sequence);
+ if (likely((seqcount & ATOMIC_SEQCOUNT_WRITERS_MASK) ==
+ ATOMIC_SEQCOUNT_EXCLUSIVE_WRITER))
+ return true;
+
+ /*
+ * Whoops, we raced with another writer. Back off, converting ourselves
+ * to a shared writer and wait for any exclusive writers.
+ */
+ atomic_long_add(ATOMIC_SEQCOUNT_SHARED_WRITER - ATOMIC_SEQCOUNT_EXCLUSIVE_WRITER,
+ &s->sequence);
+ /*
+ * No need for __smp_mb__after_atomic(): the reader side already
+ * realizes that it has to retry and the memory barrier from
+ * atomic_long_add_return() is sufficient for that.
+ */
+ while (atomic_long_read(&s->sequence) & ATOMIC_SEQCOUNT_EXCLUSIVE_WRITERS_MASK)
+ cpu_relax();
+ return false;
+#else
seqcount_new = seqcount | ATOMIC_SEQCOUNT_EXCLUSIVE_WRITER;
/*
* Store the sequence before any store in the critical section. Further,
@@ -168,6 +196,7 @@ static inline bool raw_write_atomic_seqcount_begin(raw_atomic_seqcount_t *s,
*/
if (atomic_long_try_cmpxchg(&s->sequence, &seqcount, seqcount_new))
return true;
+#endif
shared:
/*
* Indicate that there is a shared writer, and spin until the exclusive
@@ -185,10 +214,10 @@ static inline bool raw_write_atomic_seqcount_begin(raw_atomic_seqcount_t *s,
DEBUG_LOCKS_WARN_ON((seqcount & ATOMIC_SEQCOUNT_SHARED_WRITERS_MASK) >
ATOMIC_SEQCOUNT_SHARED_WRITERS_MAX);
#endif /* CONFIG_DEBUG_ATOMIC_SEQCOUNT */
- if (likely(!(seqcount & ATOMIC_SEQCOUNT_EXCLUSIVE_WRITER)))
+ if (likely(!(seqcount & ATOMIC_SEQCOUNT_EXCLUSIVE_WRITERS_MASK)))
return false;
- while (atomic_long_read(&s->sequence) & ATOMIC_SEQCOUNT_EXCLUSIVE_WRITER)
+ while (atomic_long_read(&s->sequence) & ATOMIC_SEQCOUNT_EXCLUSIVE_WRITERS_MASK)
cpu_relax();
return false;
}
@@ -209,7 +238,7 @@ static inline void raw_write_atomic_seqcount_end(raw_atomic_seqcount_t *s,
if (likely(exclusive)) {
#ifdef CONFIG_DEBUG_ATOMIC_SEQCOUNT
DEBUG_LOCKS_WARN_ON(!(atomic_long_read(&s->sequence) &
- ATOMIC_SEQCOUNT_EXCLUSIVE_WRITER));
+ ATOMIC_SEQCOUNT_EXCLUSIVE_WRITERS_MASK));
#endif /* CONFIG_DEBUG_ATOMIC_SEQCOUNT */
val -= ATOMIC_SEQCOUNT_EXCLUSIVE_WRITER;
} else {
--
2.41.0
next prev parent reply other threads:[~2023-11-24 13:27 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-24 13:26 [PATCH WIP v1 00/20] mm: precise "mapped shared" vs. "mapped exclusively" detection for PTE-mapped THP / partially-mappable folios David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 01/20] mm/rmap: factor out adding folio range into __folio_add_rmap_range() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 02/20] mm: add a total mapcount for large folios David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 03/20] mm: convert folio_estimated_sharers() to folio_mapped_shared() and improve it David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 04/20] mm/rmap: pass dst_vma to page_try_dup_anon_rmap() and page_dup_file_rmap() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 05/20] mm/rmap: abstract total mapcount operations for partially-mappable folios David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 06/20] atomic_seqcount: new (raw) seqcount variant to support concurrent writers David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 07/20] mm/rmap_id: track if one ore multiple MMs map a partially-mappable folio David Hildenbrand
2023-12-17 19:13 ` Nadav Amit
2023-12-18 14:04 ` David Hildenbrand
2023-12-18 14:34 ` Nadav Amit
2023-11-24 13:26 ` [PATCH WIP v1 08/20] mm: pass MM to folio_mapped_shared() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 09/20] mm: improve folio_mapped_shared() for partially-mappable folios using rmap IDs David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 10/20] mm/memory: COW reuse support for PTE-mapped THP with " David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 11/20] mm/rmap_id: support for 1, 2 and 3 values by manual calculation David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 12/20] mm/rmap: introduce folio_add_anon_rmap_range() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 13/20] mm/huge_memory: batch rmap operations in __split_huge_pmd_locked() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 14/20] mm/huge_memory: avoid folio_refcount() < folio_mapcount() " David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 15/20] mm/rmap_id: verify precalculated subids with CONFIG_DEBUG_VM David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 16/20] atomic_seqcount: support a single exclusive writer in the absence of other writers David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 17/20] mm/rmap_id: reduce atomic RMW operations when we are the exclusive writer David Hildenbrand
2023-11-24 13:26 ` David Hildenbrand [this message]
2023-11-24 13:26 ` [PATCH WIP v1 19/20] mm/rmap: factor out removing folio range into __folio_remove_rmap_range() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 20/20] mm/rmap: perform all mapcount operations of large folios under the rmap seqcount David Hildenbrand
2023-11-24 20:55 ` [PATCH WIP v1 00/20] mm: precise "mapped shared" vs. "mapped exclusively" detection for PTE-mapped THP / partially-mappable folios Linus Torvalds
2023-11-25 17:02 ` David Hildenbrand
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231124132626.235350-19-david@redhat.com \
--to=david@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=fengwei.yin@intel.com \
--cc=hughd@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=longman@redhat.com \
--cc=mingo@redhat.com \
--cc=paulmck@kernel.org \
--cc=peterz@infradead.org \
--cc=ryan.roberts@arm.com \
--cc=shy828301@gmail.com \
--cc=torvalds@linux-foundation.org \
--cc=will@kernel.org \
--cc=willy@infradead.org \
--cc=ying.huang@intel.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).