* [PATCH v2 01/17] iomap: Hold state_lock over call to ifs_set_range_uptodate()
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 02/17] iomap: Protect read_bytes_pending with the state_lock Matthew Wilcox (Oracle)
` (15 subsequent siblings)
16 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
This is really preparation for the next patch, but it lets us call
folio_mark_uptodate() in just one place instead of two.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/iomap/buffered-io.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 5db54ca29a35..6e780ca64ce3 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -57,30 +57,32 @@ static inline bool ifs_block_is_uptodate(struct iomap_folio_state *ifs,
return test_bit(block, ifs->state);
}
-static void ifs_set_range_uptodate(struct folio *folio,
+static bool ifs_set_range_uptodate(struct folio *folio,
struct iomap_folio_state *ifs, size_t off, size_t len)
{
struct inode *inode = folio->mapping->host;
unsigned int first_blk = off >> inode->i_blkbits;
unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
unsigned int nr_blks = last_blk - first_blk + 1;
- unsigned long flags;
- spin_lock_irqsave(&ifs->state_lock, flags);
bitmap_set(ifs->state, first_blk, nr_blks);
- if (ifs_is_fully_uptodate(folio, ifs))
- folio_mark_uptodate(folio);
- spin_unlock_irqrestore(&ifs->state_lock, flags);
+ return ifs_is_fully_uptodate(folio, ifs);
}
static void iomap_set_range_uptodate(struct folio *folio, size_t off,
size_t len)
{
struct iomap_folio_state *ifs = folio->private;
+ unsigned long flags;
+ bool uptodate = true;
- if (ifs)
- ifs_set_range_uptodate(folio, ifs, off, len);
- else
+ if (ifs) {
+ spin_lock_irqsave(&ifs->state_lock, flags);
+ uptodate = ifs_set_range_uptodate(folio, ifs, off, len);
+ spin_unlock_irqrestore(&ifs->state_lock, flags);
+ }
+
+ if (uptodate)
folio_mark_uptodate(folio);
}
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2 02/17] iomap: Protect read_bytes_pending with the state_lock
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 01/17] iomap: Hold state_lock over call to ifs_set_range_uptodate() Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 03/17] mm: Add folio_end_read() Matthew Wilcox (Oracle)
` (14 subsequent siblings)
16 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
Perform one atomic operation (acquiring the spinlock) instead of
two (spinlock & atomic_sub) per read completion.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/iomap/buffered-io.c | 37 +++++++++++++++++++++++++------------
1 file changed, 25 insertions(+), 12 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 6e780ca64ce3..4a996c5327ef 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -29,9 +29,9 @@ typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length);
* and I/O completions.
*/
struct iomap_folio_state {
- atomic_t read_bytes_pending;
- atomic_t write_bytes_pending;
spinlock_t state_lock;
+ unsigned int read_bytes_pending;
+ atomic_t write_bytes_pending;
/*
* Each block has two bits in this bitmap:
@@ -183,7 +183,7 @@ static void ifs_free(struct folio *folio)
if (!ifs)
return;
- WARN_ON_ONCE(atomic_read(&ifs->read_bytes_pending));
+ WARN_ON_ONCE(ifs->read_bytes_pending != 0);
WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending));
WARN_ON_ONCE(ifs_is_fully_uptodate(folio, ifs) !=
folio_test_uptodate(folio));
@@ -250,19 +250,29 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
*lenp = plen;
}
-static void iomap_finish_folio_read(struct folio *folio, size_t offset,
+static void iomap_finish_folio_read(struct folio *folio, size_t off,
size_t len, int error)
{
struct iomap_folio_state *ifs = folio->private;
+ bool uptodate = !error;
+ bool finished = true;
- if (unlikely(error)) {
- folio_clear_uptodate(folio);
- folio_set_error(folio);
- } else {
- iomap_set_range_uptodate(folio, offset, len);
+ if (ifs) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&ifs->state_lock, flags);
+ if (!error)
+ uptodate = ifs_set_range_uptodate(folio, ifs, off, len);
+ ifs->read_bytes_pending -= len;
+ finished = !ifs->read_bytes_pending;
+ spin_unlock_irqrestore(&ifs->state_lock, flags);
}
- if (!ifs || atomic_sub_and_test(len, &ifs->read_bytes_pending))
+ if (error)
+ folio_set_error(folio);
+ if (uptodate)
+ folio_mark_uptodate(folio);
+ if (finished)
folio_unlock(folio);
}
@@ -360,8 +370,11 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
}
ctx->cur_folio_in_bio = true;
- if (ifs)
- atomic_add(plen, &ifs->read_bytes_pending);
+ if (ifs) {
+ spin_lock_irq(&ifs->state_lock);
+ ifs->read_bytes_pending += plen;
+ spin_unlock_irq(&ifs->state_lock);
+ }
sector = iomap_sector(iomap, pos);
if (!ctx->bio ||
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2 03/17] mm: Add folio_end_read()
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 01/17] iomap: Hold state_lock over call to ifs_set_range_uptodate() Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 02/17] iomap: Protect read_bytes_pending with the state_lock Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2024-02-23 15:26 ` Tetsuo Handa
2023-10-04 16:53 ` [PATCH v2 04/17] ext4: Use folio_end_read() Matthew Wilcox (Oracle)
` (13 subsequent siblings)
16 siblings, 1 reply; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
Provide a function for filesystems to call when they have finished
reading an entire folio.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
include/linux/pagemap.h | 1 +
mm/filemap.c | 22 ++++++++++++++++++++++
2 files changed, 23 insertions(+)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 351c3b7f93a1..5bb2f5f802bc 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1129,6 +1129,7 @@ static inline void wait_on_page_locked(struct page *page)
folio_wait_locked(page_folio(page));
}
+void folio_end_read(struct folio *folio, bool success);
void wait_on_page_writeback(struct page *page);
void folio_wait_writeback(struct folio *folio);
int folio_wait_writeback_killable(struct folio *folio);
diff --git a/mm/filemap.c b/mm/filemap.c
index f0a15ce1bd1b..ea317cdf9532 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1527,6 +1527,28 @@ void folio_unlock(struct folio *folio)
}
EXPORT_SYMBOL(folio_unlock);
+/**
+ * folio_end_read - End read on a folio.
+ * @folio: The folio.
+ * @success: True if all reads completed successfully.
+ *
+ * When all reads against a folio have completed, filesystems should
+ * call this function to let the pagecache know that no more reads
+ * are outstanding. This will unlock the folio and wake up any thread
+ * sleeping on the lock. The folio will also be marked uptodate if all
+ * reads succeeded.
+ *
+ * Context: May be called from interrupt or process context. May not be
+ * called from NMI context.
+ */
+void folio_end_read(struct folio *folio, bool success)
+{
+ if (likely(success))
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
+}
+EXPORT_SYMBOL(folio_end_read);
+
/**
* folio_end_private_2 - Clear PG_private_2 and wake any waiters.
* @folio: The folio.
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v2 03/17] mm: Add folio_end_read()
2023-10-04 16:53 ` [PATCH v2 03/17] mm: Add folio_end_read() Matthew Wilcox (Oracle)
@ 2024-02-23 15:26 ` Tetsuo Handa
2024-02-23 15:36 ` Matthew Wilcox
0 siblings, 1 reply; 23+ messages in thread
From: Tetsuo Handa @ 2024-02-23 15:26 UTC (permalink / raw)
To: Matthew Wilcox (Oracle), Andrew Morton
Cc: linux-mm, linux-fsdevel, linux-arch, torvalds, npiggin
During a bisection of a different problem, I noticed that
commit 0b237047d5a7 ("mm: add folio_end_read()") added folio_end_read() as
----------------------------------------
+void folio_end_read(struct folio *folio, bool success)
+{
+ if (likely(success))
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
+}
----------------------------------------
but commit f8174a118122 ("ext4: use folio_end_read()") for unknown reason
removed folio_clear_uptodate() call when bio->bi_status != 0.
----------------------------------------
- bio_for_each_folio_all(fi, bio) {
- struct folio *folio = fi.folio;
-
- if (bio->bi_status)
- folio_clear_uptodate(folio);
- else
- folio_mark_uptodate(folio);
- folio_unlock(folio);
- }
+ bio_for_each_folio_all(fi, bio)
+ folio_end_read(fi.folio, bio->bi_status == 0);
----------------------------------------
Why
else
folio_clear_uptodate(folio);
part is missing in folio_end_read() ?
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2 03/17] mm: Add folio_end_read()
2024-02-23 15:26 ` Tetsuo Handa
@ 2024-02-23 15:36 ` Matthew Wilcox
0 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox @ 2024-02-23 15:36 UTC (permalink / raw)
To: Tetsuo Handa
Cc: Andrew Morton, linux-mm, linux-fsdevel, linux-arch, torvalds,
npiggin
On Sat, Feb 24, 2024 at 12:26:41AM +0900, Tetsuo Handa wrote:
> During a bisection of a different problem, I noticed that
> commit 0b237047d5a7 ("mm: add folio_end_read()") added folio_end_read() as
>
> ----------------------------------------
> +void folio_end_read(struct folio *folio, bool success)
> +{
> + if (likely(success))
> + folio_mark_uptodate(folio);
> + folio_unlock(folio);
> +}
> ----------------------------------------
>
> but commit f8174a118122 ("ext4: use folio_end_read()") for unknown reason
> removed folio_clear_uptodate() call when bio->bi_status != 0.
>
> ----------------------------------------
> - bio_for_each_folio_all(fi, bio) {
> - struct folio *folio = fi.folio;
> -
> - if (bio->bi_status)
> - folio_clear_uptodate(folio);
> - else
> - folio_mark_uptodate(folio);
> - folio_unlock(folio);
> - }
> + bio_for_each_folio_all(fi, bio)
> + folio_end_read(fi.folio, bio->bi_status == 0);
> ----------------------------------------
>
> Why
>
> else
> folio_clear_uptodate(folio);
>
> part is missing in folio_end_read() ?
Because the folio already has its uptodate flag clear. We don't re-read
folios which have the uptodate flag set. This was always dead code.
Now we assert it's true in folio_end_read():
VM_BUG_ON_FOLIO(folio_test_uptodate(folio), folio);
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v2 04/17] ext4: Use folio_end_read()
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
` (2 preceding siblings ...)
2023-10-04 16:53 ` [PATCH v2 03/17] mm: Add folio_end_read() Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 05/17] buffer: " Matthew Wilcox (Oracle)
` (12 subsequent siblings)
16 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
folio_end_read() is the perfect fit for ext4.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/ext4/readpage.c | 14 +++-----------
1 file changed, 3 insertions(+), 11 deletions(-)
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 3e7d160f543f..21e8f0aebb3c 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -70,15 +70,8 @@ static void __read_end_io(struct bio *bio)
{
struct folio_iter fi;
- bio_for_each_folio_all(fi, bio) {
- struct folio *folio = fi.folio;
-
- if (bio->bi_status)
- folio_clear_uptodate(folio);
- else
- folio_mark_uptodate(folio);
- folio_unlock(folio);
- }
+ bio_for_each_folio_all(fi, bio)
+ folio_end_read(fi.folio, bio->bi_status == 0);
if (bio->bi_private)
mempool_free(bio->bi_private, bio_post_read_ctx_pool);
bio_put(bio);
@@ -336,8 +329,7 @@ int ext4_mpage_readpages(struct inode *inode,
if (ext4_need_verity(inode, folio->index) &&
!fsverity_verify_folio(folio))
goto set_error_page;
- folio_mark_uptodate(folio);
- folio_unlock(folio);
+ folio_end_read(folio, true);
continue;
}
} else if (fully_mapped) {
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2 05/17] buffer: Use folio_end_read()
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
` (3 preceding siblings ...)
2023-10-04 16:53 ` [PATCH v2 04/17] ext4: Use folio_end_read() Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 06/17] iomap: " Matthew Wilcox (Oracle)
` (11 subsequent siblings)
16 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
There are two places that we can use this new helper.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/buffer.c | 16 ++++------------
1 file changed, 4 insertions(+), 12 deletions(-)
diff --git a/fs/buffer.c b/fs/buffer.c
index 12e9a71c693d..b7bafba87273 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -282,13 +282,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
} while (tmp != bh);
spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
- /*
- * If all of the buffers are uptodate then we can set the page
- * uptodate.
- */
- if (folio_uptodate)
- folio_mark_uptodate(folio);
- folio_unlock(folio);
+ folio_end_read(folio, folio_uptodate);
return;
still_busy:
@@ -2425,12 +2419,10 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
if (!nr) {
/*
- * All buffers are uptodate - we can set the folio uptodate
- * as well. But not if get_block() returned an error.
+ * All buffers are uptodate or get_block() returned an
+ * error when trying to map them - we can finish the read.
*/
- if (!page_error)
- folio_mark_uptodate(folio);
- folio_unlock(folio);
+ folio_end_read(folio, !page_error);
return 0;
}
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2 06/17] iomap: Use folio_end_read()
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
` (4 preceding siblings ...)
2023-10-04 16:53 ` [PATCH v2 05/17] buffer: " Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 07/17] bitops: Add xor_unlock_is_negative_byte() Matthew Wilcox (Oracle)
` (10 subsequent siblings)
16 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
Combine the setting of the uptodate flag with the clearing of the
locked flag.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/iomap/buffered-io.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 4a996c5327ef..5d19a2b47b6a 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -270,10 +270,8 @@ static void iomap_finish_folio_read(struct folio *folio, size_t off,
if (error)
folio_set_error(folio);
- if (uptodate)
- folio_mark_uptodate(folio);
if (finished)
- folio_unlock(folio);
+ folio_end_read(folio, uptodate);
}
static void iomap_read_end_io(struct bio *bio)
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2 07/17] bitops: Add xor_unlock_is_negative_byte()
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
` (5 preceding siblings ...)
2023-10-04 16:53 ` [PATCH v2 06/17] iomap: " Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 08/17] alpha: Implement xor_unlock_is_negative_byte Matthew Wilcox (Oracle)
` (9 subsequent siblings)
16 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
Replace clear_bit_and_unlock_is_negative_byte() with
xor_unlock_is_negative_byte(). We have a few places that like to lock
a folio, set a flag and unlock it again. Allow for the possibility of
combining the latter two operations for efficiency. We are guaranteed
that the caller holds the lock, so it is safe to unlock it with the xor.
The caller must guarantee that nobody else will set the flag without
holding the lock; it is not safe to do this with the PG_dirty flag,
for example.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
arch/powerpc/include/asm/bitops.h | 17 ++++--------
arch/x86/include/asm/bitops.h | 11 ++++----
.../asm-generic/bitops/instrumented-lock.h | 27 ++++++++++---------
include/asm-generic/bitops/lock.h | 21 ++++-----------
kernel/kcsan/kcsan_test.c | 8 +++---
kernel/kcsan/selftest.c | 8 +++---
mm/filemap.c | 5 ++++
mm/kasan/kasan_test.c | 7 ++---
8 files changed, 47 insertions(+), 57 deletions(-)
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 7e0f0322912b..40cc3ded60cb 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -234,32 +234,25 @@ static inline int arch_test_and_change_bit(unsigned long nr,
}
#ifdef CONFIG_PPC64
-static inline unsigned long
-clear_bit_unlock_return_word(int nr, volatile unsigned long *addr)
+static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *p)
{
unsigned long old, t;
- unsigned long *p = (unsigned long *)addr + BIT_WORD(nr);
- unsigned long mask = BIT_MASK(nr);
__asm__ __volatile__ (
PPC_RELEASE_BARRIER
"1:" PPC_LLARX "%0,0,%3,0\n"
- "andc %1,%0,%2\n"
+ "xor %1,%0,%2\n"
PPC_STLCX "%1,0,%3\n"
"bne- 1b\n"
: "=&r" (old), "=&r" (t)
: "r" (mask), "r" (p)
: "cc", "memory");
- return old;
+ return (old & BIT_MASK(7)) != 0;
}
-/*
- * This is a special function for mm/filemap.c
- * Bit 7 corresponds to PG_waiters.
- */
-#define arch_clear_bit_unlock_is_negative_byte(nr, addr) \
- (clear_bit_unlock_return_word(nr, addr) & BIT_MASK(7))
+#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
#endif /* CONFIG_PPC64 */
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 2edf68475fec..f03c0a50ec3a 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -94,18 +94,17 @@ arch___clear_bit(unsigned long nr, volatile unsigned long *addr)
asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
-static __always_inline bool
-arch_clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
+static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *addr)
{
bool negative;
- asm volatile(LOCK_PREFIX "andb %2,%1"
+ asm volatile(LOCK_PREFIX "xorb %2,%1"
CC_SET(s)
: CC_OUT(s) (negative), WBYTE_ADDR(addr)
- : "ir" ((char) ~(1 << nr)) : "memory");
+ : "iq" ((char)mask) : "memory");
return negative;
}
-#define arch_clear_bit_unlock_is_negative_byte \
- arch_clear_bit_unlock_is_negative_byte
+#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
static __always_inline void
arch___clear_bit_unlock(long nr, volatile unsigned long *addr)
diff --git a/include/asm-generic/bitops/instrumented-lock.h b/include/asm-generic/bitops/instrumented-lock.h
index eb64bd4f11f3..e8ea3aeda9a9 100644
--- a/include/asm-generic/bitops/instrumented-lock.h
+++ b/include/asm-generic/bitops/instrumented-lock.h
@@ -58,27 +58,30 @@ static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr)
return arch_test_and_set_bit_lock(nr, addr);
}
-#if defined(arch_clear_bit_unlock_is_negative_byte)
+#if defined(arch_xor_unlock_is_negative_byte)
/**
- * clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom
- * byte is negative, for unlock.
- * @nr: the bit to clear
- * @addr: the address to start counting from
+ * xor_unlock_is_negative_byte - XOR a single byte in memory and test if
+ * it is negative, for unlock.
+ * @mask: Change the bits which are set in this mask.
+ * @addr: The address of the word containing the byte to change.
*
+ * Changes some of bits 0-6 in the word pointed to by @addr.
* This operation is atomic and provides release barrier semantics.
+ * Used to optimise some folio operations which are commonly paired
+ * with an unlock or end of writeback. Bit 7 is used as PG_waiters to
+ * indicate whether anybody is waiting for the unlock.
*
- * This is a bit of a one-trick-pony for the filemap code, which clears
- * PG_locked and tests PG_waiters,
+ * Return: Whether the top bit of the byte is set.
*/
-static inline bool
-clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
+static inline bool xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *addr)
{
kcsan_release();
- instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
- return arch_clear_bit_unlock_is_negative_byte(nr, addr);
+ instrument_atomic_write(addr, sizeof(long));
+ return arch_xor_unlock_is_negative_byte(mask, addr);
}
/* Let everybody know we have it. */
-#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
+#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte
#endif
#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_LOCK_H */
diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h
index 40913516e654..6a638e89d130 100644
--- a/include/asm-generic/bitops/lock.h
+++ b/include/asm-generic/bitops/lock.h
@@ -66,27 +66,16 @@ arch___clear_bit_unlock(unsigned int nr, volatile unsigned long *p)
raw_atomic_long_set_release((atomic_long_t *)p, old);
}
-/**
- * arch_clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom
- * byte is negative, for unlock.
- * @nr: the bit to clear
- * @addr: the address to start counting from
- *
- * This is a bit of a one-trick-pony for the filemap code, which clears
- * PG_locked and tests PG_waiters,
- */
-#ifndef arch_clear_bit_unlock_is_negative_byte
-static inline bool arch_clear_bit_unlock_is_negative_byte(unsigned int nr,
- volatile unsigned long *p)
+#ifndef arch_xor_unlock_is_negative_byte
+static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *p)
{
long old;
- unsigned long mask = BIT_MASK(nr);
- p += BIT_WORD(nr);
- old = raw_atomic_long_fetch_andnot_release(mask, (atomic_long_t *)p);
+ old = raw_atomic_long_fetch_xor_release(mask, (atomic_long_t *)p);
return !!(old & BIT(7));
}
-#define arch_clear_bit_unlock_is_negative_byte arch_clear_bit_unlock_is_negative_byte
+#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
#endif
#include <asm-generic/bitops/instrumented-lock.h>
diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c
index 0ddbdab5903d..1333d23ac4ef 100644
--- a/kernel/kcsan/kcsan_test.c
+++ b/kernel/kcsan/kcsan_test.c
@@ -700,10 +700,10 @@ static void test_barrier_nothreads(struct kunit *test)
KCSAN_EXPECT_RW_BARRIER(mutex_lock(&test_mutex), false);
KCSAN_EXPECT_RW_BARRIER(mutex_unlock(&test_mutex), true);
-#ifdef clear_bit_unlock_is_negative_byte
- KCSAN_EXPECT_READ_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var), true);
- KCSAN_EXPECT_WRITE_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var), true);
- KCSAN_EXPECT_RW_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var), true);
+#ifdef xor_unlock_is_negative_byte
+ KCSAN_EXPECT_READ_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true);
+ KCSAN_EXPECT_WRITE_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true);
+ KCSAN_EXPECT_RW_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true);
#endif
kcsan_nestable_atomic_end();
}
diff --git a/kernel/kcsan/selftest.c b/kernel/kcsan/selftest.c
index 8679322450f2..619be7417420 100644
--- a/kernel/kcsan/selftest.c
+++ b/kernel/kcsan/selftest.c
@@ -228,10 +228,10 @@ static bool __init test_barrier(void)
spin_lock(&test_spinlock);
KCSAN_CHECK_RW_BARRIER(spin_unlock(&test_spinlock));
-#ifdef clear_bit_unlock_is_negative_byte
- KCSAN_CHECK_RW_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var));
- KCSAN_CHECK_READ_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var));
- KCSAN_CHECK_WRITE_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var));
+#ifdef xor_unlock_is_negative_byte
+ KCSAN_CHECK_RW_BARRIER(xor_unlock_is_negative_byte(1, &test_var));
+ KCSAN_CHECK_READ_BARRIER(xor_unlock_is_negative_byte(1, &test_var));
+ KCSAN_CHECK_WRITE_BARRIER(xor_unlock_is_negative_byte(1, &test_var));
#endif
kcsan_nestable_atomic_end();
diff --git a/mm/filemap.c b/mm/filemap.c
index ea317cdf9532..7da465616317 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1484,6 +1484,11 @@ void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter)
}
EXPORT_SYMBOL_GPL(folio_add_wait_queue);
+#ifdef xor_unlock_is_negative_byte
+#define clear_bit_unlock_is_negative_byte(nr, p) \
+ xor_unlock_is_negative_byte(1 << nr, p)
+#endif
+
#ifndef clear_bit_unlock_is_negative_byte
/*
diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c
index b61cc6a42541..c1de091dfc92 100644
--- a/mm/kasan/kasan_test.c
+++ b/mm/kasan/kasan_test.c
@@ -1098,9 +1098,10 @@ static void kasan_bitops_test_and_modify(struct kunit *test, int nr, void *addr)
KUNIT_EXPECT_KASAN_FAIL(test, __test_and_change_bit(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = test_bit(nr, addr));
-#if defined(clear_bit_unlock_is_negative_byte)
- KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result =
- clear_bit_unlock_is_negative_byte(nr, addr));
+#if defined(xor_unlock_is_negative_byte)
+ if (nr < 7)
+ KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result =
+ xor_unlock_is_negative_byte(1 << nr, addr));
#endif
}
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2 08/17] alpha: Implement xor_unlock_is_negative_byte
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
` (6 preceding siblings ...)
2023-10-04 16:53 ` [PATCH v2 07/17] bitops: Add xor_unlock_is_negative_byte() Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 09/17] m68k: " Matthew Wilcox (Oracle)
` (8 subsequent siblings)
16 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
Inspired by the alpha clear_bit() and arch_atomic_add_return(), this
will surely be more efficient than the generic one defined in filemap.c.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
arch/alpha/include/asm/bitops.h | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h
index bafb1c1f0fdc..b50ad6b83e85 100644
--- a/arch/alpha/include/asm/bitops.h
+++ b/arch/alpha/include/asm/bitops.h
@@ -286,6 +286,27 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
#define arch_test_bit generic_test_bit
#define arch_test_bit_acquire generic_test_bit_acquire
+static inline bool xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *p)
+{
+ unsigned long temp, old;
+
+ __asm__ __volatile__(
+ "1: ldl_l %0,%4\n"
+ " mov %0,%2\n"
+ " xor %0,%3,%0\n"
+ " stl_c %0,%1\n"
+ " beq %0,2f\n"
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ :"=&r" (temp), "=m" (*p), "=&r" (old)
+ :"Ir" (mask), "m" (*p));
+
+ return (old & BIT(7)) != 0;
+}
+#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte
+
/*
* ffz = Find First Zero in word. Undefined if no zero exists,
* so code should check against ~0UL first..
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2 09/17] m68k: Implement xor_unlock_is_negative_byte
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
` (7 preceding siblings ...)
2023-10-04 16:53 ` [PATCH v2 08/17] alpha: Implement xor_unlock_is_negative_byte Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2023-10-04 23:49 ` Greg Ungerer
2023-10-05 8:11 ` Geert Uytterhoeven
2023-10-04 16:53 ` [PATCH v2 10/17] mips: " Matthew Wilcox (Oracle)
` (7 subsequent siblings)
16 siblings, 2 replies; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
Using EOR to clear the guaranteed-to-be-set lock bit will test the
negative flag just like the x86 implementation. This should be
more efficient than the generic implementation in filemap.c. It
would be better if m68k had __GCC_ASM_FLAG_OUTPUTS__.
Coldfire doesn't have a byte-sized EOR, so we test bit 7 after the
EOR, which is a second memory access, but it's slightly better than
the current C code.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
arch/m68k/include/asm/bitops.h | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h
index e984af71df6b..80ee36095905 100644
--- a/arch/m68k/include/asm/bitops.h
+++ b/arch/m68k/include/asm/bitops.h
@@ -319,6 +319,28 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
return test_and_change_bit(nr, addr);
}
+static inline bool xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *p)
+{
+#ifdef CONFIG_COLDFIRE
+ __asm__ __volatile__ ("eorl %1, %0"
+ : "+m" (*p)
+ : "d" (mask)
+ : "memory");
+ return *p & (1 << 7);
+#else
+ char result;
+ char *cp = (char *)p + 3; /* m68k is big-endian */
+
+ __asm__ __volatile__ ("eor.b %1, %2; smi %0"
+ : "=d" (result)
+ : "di" (mask), "o" (*cp)
+ : "memory");
+ return result;
+#endif
+}
+#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte
+
/*
* The true 68020 and more advanced processors support the "bfffo"
* instruction for finding bits. ColdFire and simple 68000 parts
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v2 09/17] m68k: Implement xor_unlock_is_negative_byte
2023-10-04 16:53 ` [PATCH v2 09/17] m68k: " Matthew Wilcox (Oracle)
@ 2023-10-04 23:49 ` Greg Ungerer
2023-10-05 8:11 ` Geert Uytterhoeven
1 sibling, 0 replies; 23+ messages in thread
From: Greg Ungerer @ 2023-10-04 23:49 UTC (permalink / raw)
To: Matthew Wilcox (Oracle), Andrew Morton
Cc: linux-mm, linux-fsdevel, linux-arch, torvalds, npiggin
On 5/10/23 02:53, Matthew Wilcox (Oracle) wrote:
> Using EOR to clear the guaranteed-to-be-set lock bit will test the
> negative flag just like the x86 implementation. This should be
> more efficient than the generic implementation in filemap.c. It
> would be better if m68k had __GCC_ASM_FLAG_OUTPUTS__.
>
> Coldfire doesn't have a byte-sized EOR, so we test bit 7 after the
> EOR, which is a second memory access, but it's slightly better than
> the current C code.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Greg Ungerer <gerg@linux-m68k.org>
> ---
> arch/m68k/include/asm/bitops.h | 22 ++++++++++++++++++++++
> 1 file changed, 22 insertions(+)
>
> diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h
> index e984af71df6b..80ee36095905 100644
> --- a/arch/m68k/include/asm/bitops.h
> +++ b/arch/m68k/include/asm/bitops.h
> @@ -319,6 +319,28 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
> return test_and_change_bit(nr, addr);
> }
>
> +static inline bool xor_unlock_is_negative_byte(unsigned long mask,
> + volatile unsigned long *p)
> +{
> +#ifdef CONFIG_COLDFIRE
> + __asm__ __volatile__ ("eorl %1, %0"
> + : "+m" (*p)
> + : "d" (mask)
> + : "memory");
> + return *p & (1 << 7);
> +#else
> + char result;
> + char *cp = (char *)p + 3; /* m68k is big-endian */
> +
> + __asm__ __volatile__ ("eor.b %1, %2; smi %0"
> + : "=d" (result)
> + : "di" (mask), "o" (*cp)
> + : "memory");
> + return result;
> +#endif
> +}
> +#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte
> +
> /*
> * The true 68020 and more advanced processors support the "bfffo"
> * instruction for finding bits. ColdFire and simple 68000 parts
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2 09/17] m68k: Implement xor_unlock_is_negative_byte
2023-10-04 16:53 ` [PATCH v2 09/17] m68k: " Matthew Wilcox (Oracle)
2023-10-04 23:49 ` Greg Ungerer
@ 2023-10-05 8:11 ` Geert Uytterhoeven
1 sibling, 0 replies; 23+ messages in thread
From: Geert Uytterhoeven @ 2023-10-05 8:11 UTC (permalink / raw)
To: Matthew Wilcox (Oracle)
Cc: Andrew Morton, linux-mm, linux-fsdevel, linux-arch, torvalds,
npiggin
On Wed, Oct 4, 2023 at 6:53 PM Matthew Wilcox (Oracle)
<willy@infradead.org> wrote:
> Using EOR to clear the guaranteed-to-be-set lock bit will test the
> negative flag just like the x86 implementation. This should be
> more efficient than the generic implementation in filemap.c. It
> would be better if m68k had __GCC_ASM_FLAG_OUTPUTS__.
>
> Coldfire doesn't have a byte-sized EOR, so we test bit 7 after the
> EOR, which is a second memory access, but it's slightly better than
> the current C code.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v2 10/17] mips: Implement xor_unlock_is_negative_byte
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
` (8 preceding siblings ...)
2023-10-04 16:53 ` [PATCH v2 09/17] m68k: " Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 11/17] powerpc: Implement arch_xor_unlock_is_negative_byte on 32-bit Matthew Wilcox (Oracle)
` (6 subsequent siblings)
16 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
Inspired by the mips test_and_change_bit(), this will surely be more
efficient than the generic one defined in filemap.c
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
arch/mips/include/asm/bitops.h | 26 +++++++++++++++++++++++++-
arch/mips/lib/bitops.c | 14 ++++++++++++++
2 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index b4bf754f7db3..d98a05c478f4 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -73,7 +73,8 @@ int __mips_test_and_clear_bit(unsigned long nr,
volatile unsigned long *addr);
int __mips_test_and_change_bit(unsigned long nr,
volatile unsigned long *addr);
-
+bool __mips_xor_is_negative_byte(unsigned long mask,
+ volatile unsigned long *addr);
/*
* set_bit - Atomically set a bit in memory
@@ -279,6 +280,29 @@ static inline int test_and_change_bit(unsigned long nr,
return res;
}
+static inline bool xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *p)
+{
+ unsigned long orig;
+ bool res;
+
+ smp_mb__before_atomic();
+
+ if (!kernel_uses_llsc) {
+ res = __mips_xor_is_negative_byte(mask, p);
+ } else {
+ orig = __test_bit_op(*p, "%0",
+ "xor\t%1, %0, %3",
+ "ir"(mask));
+ res = (orig & BIT(7)) != 0;
+ }
+
+ smp_llsc_mb();
+
+ return res;
+}
+#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte
+
#undef __bit_op
#undef __test_bit_op
diff --git a/arch/mips/lib/bitops.c b/arch/mips/lib/bitops.c
index 116d0bd8b2ae..00aee98e9d54 100644
--- a/arch/mips/lib/bitops.c
+++ b/arch/mips/lib/bitops.c
@@ -146,3 +146,17 @@ int __mips_test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
return res;
}
EXPORT_SYMBOL(__mips_test_and_change_bit);
+
+bool __mips_xor_is_negative_byte(unsigned long mask,
+ volatile unsigned long *addr)
+{
+ unsigned long flags;
+ unsigned long data;
+
+ raw_local_irq_save(flags);
+ data = *addr;
+ *addr = data ^ mask;
+ raw_local_irq_restore(flags);
+
+ return (data & BIT(7)) != 0;
+}
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2 11/17] powerpc: Implement arch_xor_unlock_is_negative_byte on 32-bit
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
` (9 preceding siblings ...)
2023-10-04 16:53 ` [PATCH v2 10/17] mips: " Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 12/17] riscv: Implement xor_unlock_is_negative_byte Matthew Wilcox (Oracle)
` (5 subsequent siblings)
16 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
Simply remove the ifdef. The assembly is identical to that in the
non-optimised case of test_and_clear_bits() on PPC32, and it's not clear
to me how the PPC32 optimisation works, nor whether it would work for
arch_xor_unlock_is_negative_byte(). If that optimisation would work,
someone can implement it later, but this is more efficient than the
implementation in filemap.c.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
arch/powerpc/include/asm/bitops.h | 4 ----
1 file changed, 4 deletions(-)
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 40cc3ded60cb..671ecc6711e3 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -233,7 +233,6 @@ static inline int arch_test_and_change_bit(unsigned long nr,
return test_and_change_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
}
-#ifdef CONFIG_PPC64
static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
volatile unsigned long *p)
{
@@ -251,11 +250,8 @@ static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
return (old & BIT_MASK(7)) != 0;
}
-
#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
-#endif /* CONFIG_PPC64 */
-
#include <asm-generic/bitops/non-atomic.h>
static inline void arch___clear_bit_unlock(int nr, volatile unsigned long *addr)
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2 12/17] riscv: Implement xor_unlock_is_negative_byte
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
` (10 preceding siblings ...)
2023-10-04 16:53 ` [PATCH v2 11/17] powerpc: Implement arch_xor_unlock_is_negative_byte on 32-bit Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 13/17] s390: Implement arch_xor_unlock_is_negative_byte Matthew Wilcox (Oracle)
` (4 subsequent siblings)
16 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
Inspired by the riscv clear_bit_unlock(), this will surely be
more efficient than the generic one defined in filemap.c.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
arch/riscv/include/asm/bitops.h | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 3540b690944b..15e3044298a2 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -191,6 +191,19 @@ static inline void __clear_bit_unlock(
clear_bit_unlock(nr, addr);
}
+static inline bool xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *addr)
+{
+ unsigned long res;
+ __asm__ __volatile__ (
+ __AMO(xor) ".rl %0, %2, %1"
+ : "=r" (res), "+A" (*addr)
+ : "r" (__NOP(mask))
+ : "memory");
+ return (res & BIT(7)) != 0;
+}
+#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte
+
#undef __test_and_op_bit
#undef __op_bit
#undef __NOP
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2 13/17] s390: Implement arch_xor_unlock_is_negative_byte
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
` (11 preceding siblings ...)
2023-10-04 16:53 ` [PATCH v2 12/17] riscv: Implement xor_unlock_is_negative_byte Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 14/17] mm: Delete checks for xor_unlock_is_negative_byte() Matthew Wilcox (Oracle)
` (3 subsequent siblings)
16 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
Inspired by the s390 arch_test_and_clear_bit(), this will surely be
more efficient than the generic one defined in filemap.c.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
arch/s390/include/asm/bitops.h | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 2de74fcd0578..c467dffa8c12 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -201,6 +201,16 @@ static inline void arch___clear_bit_unlock(unsigned long nr,
arch___clear_bit(nr, ptr);
}
+static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *ptr)
+{
+ unsigned long old;
+
+ old = __atomic64_xor_barrier(mask, (long *)ptr);
+ return old & BIT(7);
+}
+#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
+
#include <asm-generic/bitops/instrumented-atomic.h>
#include <asm-generic/bitops/instrumented-non-atomic.h>
#include <asm-generic/bitops/instrumented-lock.h>
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2 14/17] mm: Delete checks for xor_unlock_is_negative_byte()
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
` (12 preceding siblings ...)
2023-10-04 16:53 ` [PATCH v2 13/17] s390: Implement arch_xor_unlock_is_negative_byte Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2023-10-05 8:12 ` Geert Uytterhoeven
2023-10-04 16:53 ` [PATCH v2 15/17] mm: Add folio_xor_flags_has_waiters() Matthew Wilcox (Oracle)
` (2 subsequent siblings)
16 siblings, 1 reply; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
Architectures which don't define their own use the one in
asm-generic/bitops/lock.h. Get rid of all the ifdefs around "maybe we
don't have it".
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
arch/alpha/include/asm/bitops.h | 1 -
arch/m68k/include/asm/bitops.h | 1 -
arch/mips/include/asm/bitops.h | 1 -
arch/riscv/include/asm/bitops.h | 1 -
.../asm-generic/bitops/instrumented-lock.h | 5 ----
include/asm-generic/bitops/lock.h | 1 -
kernel/kcsan/kcsan_test.c | 3 --
kernel/kcsan/selftest.c | 3 --
mm/filemap.c | 30 +------------------
mm/kasan/kasan_test.c | 3 --
10 files changed, 1 insertion(+), 48 deletions(-)
diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h
index b50ad6b83e85..3e33621922c3 100644
--- a/arch/alpha/include/asm/bitops.h
+++ b/arch/alpha/include/asm/bitops.h
@@ -305,7 +305,6 @@ static inline bool xor_unlock_is_negative_byte(unsigned long mask,
return (old & BIT(7)) != 0;
}
-#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte
/*
* ffz = Find First Zero in word. Undefined if no zero exists,
diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h
index 80ee36095905..14c64a6f1217 100644
--- a/arch/m68k/include/asm/bitops.h
+++ b/arch/m68k/include/asm/bitops.h
@@ -339,7 +339,6 @@ static inline bool xor_unlock_is_negative_byte(unsigned long mask,
return result;
#endif
}
-#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte
/*
* The true 68020 and more advanced processors support the "bfffo"
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index d98a05c478f4..89f73d1a4ea4 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -301,7 +301,6 @@ static inline bool xor_unlock_is_negative_byte(unsigned long mask,
return res;
}
-#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte
#undef __bit_op
#undef __test_bit_op
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 15e3044298a2..65f6eee4ab8d 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -202,7 +202,6 @@ static inline bool xor_unlock_is_negative_byte(unsigned long mask,
: "memory");
return (res & BIT(7)) != 0;
}
-#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte
#undef __test_and_op_bit
#undef __op_bit
diff --git a/include/asm-generic/bitops/instrumented-lock.h b/include/asm-generic/bitops/instrumented-lock.h
index e8ea3aeda9a9..542d3727ee4e 100644
--- a/include/asm-generic/bitops/instrumented-lock.h
+++ b/include/asm-generic/bitops/instrumented-lock.h
@@ -58,7 +58,6 @@ static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr)
return arch_test_and_set_bit_lock(nr, addr);
}
-#if defined(arch_xor_unlock_is_negative_byte)
/**
* xor_unlock_is_negative_byte - XOR a single byte in memory and test if
* it is negative, for unlock.
@@ -80,8 +79,4 @@ static inline bool xor_unlock_is_negative_byte(unsigned long mask,
instrument_atomic_write(addr, sizeof(long));
return arch_xor_unlock_is_negative_byte(mask, addr);
}
-/* Let everybody know we have it. */
-#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte
-#endif
-
#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_LOCK_H */
diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h
index 6a638e89d130..14d4ec8c5152 100644
--- a/include/asm-generic/bitops/lock.h
+++ b/include/asm-generic/bitops/lock.h
@@ -75,7 +75,6 @@ static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
old = raw_atomic_long_fetch_xor_release(mask, (atomic_long_t *)p);
return !!(old & BIT(7));
}
-#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
#endif
#include <asm-generic/bitops/instrumented-lock.h>
diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c
index 1333d23ac4ef..015586217875 100644
--- a/kernel/kcsan/kcsan_test.c
+++ b/kernel/kcsan/kcsan_test.c
@@ -699,12 +699,9 @@ static void test_barrier_nothreads(struct kunit *test)
KCSAN_EXPECT_RW_BARRIER(spin_unlock(&test_spinlock), true);
KCSAN_EXPECT_RW_BARRIER(mutex_lock(&test_mutex), false);
KCSAN_EXPECT_RW_BARRIER(mutex_unlock(&test_mutex), true);
-
-#ifdef xor_unlock_is_negative_byte
KCSAN_EXPECT_READ_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true);
KCSAN_EXPECT_WRITE_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true);
KCSAN_EXPECT_RW_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true);
-#endif
kcsan_nestable_atomic_end();
}
diff --git a/kernel/kcsan/selftest.c b/kernel/kcsan/selftest.c
index 619be7417420..84a1200271af 100644
--- a/kernel/kcsan/selftest.c
+++ b/kernel/kcsan/selftest.c
@@ -227,12 +227,9 @@ static bool __init test_barrier(void)
KCSAN_CHECK_RW_BARRIER(arch_spin_unlock(&arch_spinlock));
spin_lock(&test_spinlock);
KCSAN_CHECK_RW_BARRIER(spin_unlock(&test_spinlock));
-
-#ifdef xor_unlock_is_negative_byte
KCSAN_CHECK_RW_BARRIER(xor_unlock_is_negative_byte(1, &test_var));
KCSAN_CHECK_READ_BARRIER(xor_unlock_is_negative_byte(1, &test_var));
KCSAN_CHECK_WRITE_BARRIER(xor_unlock_is_negative_byte(1, &test_var));
-#endif
kcsan_nestable_atomic_end();
return ret;
diff --git a/mm/filemap.c b/mm/filemap.c
index 7da465616317..ab8f798eb0af 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1484,34 +1484,6 @@ void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter)
}
EXPORT_SYMBOL_GPL(folio_add_wait_queue);
-#ifdef xor_unlock_is_negative_byte
-#define clear_bit_unlock_is_negative_byte(nr, p) \
- xor_unlock_is_negative_byte(1 << nr, p)
-#endif
-
-#ifndef clear_bit_unlock_is_negative_byte
-
-/*
- * PG_waiters is the high bit in the same byte as PG_lock.
- *
- * On x86 (and on many other architectures), we can clear PG_lock and
- * test the sign bit at the same time. But if the architecture does
- * not support that special operation, we just do this all by hand
- * instead.
- *
- * The read of PG_waiters has to be after (or concurrently with) PG_locked
- * being cleared, but a memory barrier should be unnecessary since it is
- * in the same byte as PG_locked.
- */
-static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem)
-{
- clear_bit_unlock(nr, mem);
- /* smp_mb__after_atomic(); */
- return test_bit(PG_waiters, mem);
-}
-
-#endif
-
/**
* folio_unlock - Unlock a locked folio.
* @folio: The folio.
@@ -1527,7 +1499,7 @@ void folio_unlock(struct folio *folio)
BUILD_BUG_ON(PG_waiters != 7);
BUILD_BUG_ON(PG_locked > 7);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
- if (clear_bit_unlock_is_negative_byte(PG_locked, folio_flags(folio, 0)))
+ if (xor_unlock_is_negative_byte(1 << PG_locked, folio_flags(folio, 0)))
folio_wake_bit(folio, PG_locked);
}
EXPORT_SYMBOL(folio_unlock);
diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c
index c1de091dfc92..b241b57427b1 100644
--- a/mm/kasan/kasan_test.c
+++ b/mm/kasan/kasan_test.c
@@ -1097,12 +1097,9 @@ static void kasan_bitops_test_and_modify(struct kunit *test, int nr, void *addr)
KUNIT_EXPECT_KASAN_FAIL(test, test_and_change_bit(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, __test_and_change_bit(nr, addr));
KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = test_bit(nr, addr));
-
-#if defined(xor_unlock_is_negative_byte)
if (nr < 7)
KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result =
xor_unlock_is_negative_byte(1 << nr, addr));
-#endif
}
static void kasan_bitops_generic(struct kunit *test)
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v2 14/17] mm: Delete checks for xor_unlock_is_negative_byte()
2023-10-04 16:53 ` [PATCH v2 14/17] mm: Delete checks for xor_unlock_is_negative_byte() Matthew Wilcox (Oracle)
@ 2023-10-05 8:12 ` Geert Uytterhoeven
0 siblings, 0 replies; 23+ messages in thread
From: Geert Uytterhoeven @ 2023-10-05 8:12 UTC (permalink / raw)
To: Matthew Wilcox (Oracle)
Cc: Andrew Morton, linux-mm, linux-fsdevel, linux-arch, torvalds,
npiggin
On Wed, Oct 4, 2023 at 6:53 PM Matthew Wilcox (Oracle)
<willy@infradead.org> wrote:
> Architectures which don't define their own use the one in
> asm-generic/bitops/lock.h. Get rid of all the ifdefs around "maybe we
> don't have it".
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> arch/m68k/include/asm/bitops.h | 1 -
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v2 15/17] mm: Add folio_xor_flags_has_waiters()
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
` (13 preceding siblings ...)
2023-10-04 16:53 ` [PATCH v2 14/17] mm: Delete checks for xor_unlock_is_negative_byte() Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 16/17] mm: Make __end_folio_writeback() return void Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 17/17] mm: Use folio_xor_flags_has_waiters() in folio_end_writeback() Matthew Wilcox (Oracle)
16 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
Optimise folio_end_read() by setting the uptodate bit at the same time
we clear the unlock bit. This saves at least one memory barrier and
one write-after-write hazard.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
include/linux/page-flags.h | 19 +++++++++++++++++++
mm/filemap.c | 14 +++++++++++---
2 files changed, 30 insertions(+), 3 deletions(-)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5c02720c53a5..a88e64acebfe 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -692,6 +692,25 @@ TESTPAGEFLAG_FALSE(Ksm, ksm)
u64 stable_page_flags(struct page *page);
+/**
+ * folio_xor_flags_has_waiters - Change some folio flags.
+ * @folio: The folio.
+ * @mask: Bits set in this word will be changed.
+ *
+ * This must only be used for flags which are changed with the folio
+ * lock held. For example, it is unsafe to use for PG_dirty as that
+ * can be set without the folio lock held. It can also only be used
+ * on flags which are in the range 0-6 as some of the implementations
+ * only affect those bits.
+ *
+ * Return: Whether there are tasks waiting on the folio.
+ */
+static inline bool folio_xor_flags_has_waiters(struct folio *folio,
+ unsigned long mask)
+{
+ return xor_unlock_is_negative_byte(mask, folio_flags(folio, 0));
+}
+
/**
* folio_test_uptodate - Is this folio up to date?
* @folio: The folio.
diff --git a/mm/filemap.c b/mm/filemap.c
index ab8f798eb0af..3dad2615af41 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1499,7 +1499,7 @@ void folio_unlock(struct folio *folio)
BUILD_BUG_ON(PG_waiters != 7);
BUILD_BUG_ON(PG_locked > 7);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
- if (xor_unlock_is_negative_byte(1 << PG_locked, folio_flags(folio, 0)))
+ if (folio_xor_flags_has_waiters(folio, 1 << PG_locked))
folio_wake_bit(folio, PG_locked);
}
EXPORT_SYMBOL(folio_unlock);
@@ -1520,9 +1520,17 @@ EXPORT_SYMBOL(folio_unlock);
*/
void folio_end_read(struct folio *folio, bool success)
{
+ unsigned long mask = 1 << PG_locked;
+
+ /* Must be in bottom byte for x86 to work */
+ BUILD_BUG_ON(PG_uptodate > 7);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ VM_BUG_ON_FOLIO(folio_test_uptodate(folio), folio);
+
if (likely(success))
- folio_mark_uptodate(folio);
- folio_unlock(folio);
+ mask |= 1 << PG_uptodate;
+ if (folio_xor_flags_has_waiters(folio, mask))
+ folio_wake_bit(folio, PG_locked);
}
EXPORT_SYMBOL(folio_end_read);
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2 16/17] mm: Make __end_folio_writeback() return void
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
` (14 preceding siblings ...)
2023-10-04 16:53 ` [PATCH v2 15/17] mm: Add folio_xor_flags_has_waiters() Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
2023-10-04 16:53 ` [PATCH v2 17/17] mm: Use folio_xor_flags_has_waiters() in folio_end_writeback() Matthew Wilcox (Oracle)
16 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
Rather than check the result of test-and-clear, just check that we have
the writeback bit set at the start. This wouldn't catch every case, but
it's good enough (and enables the next patch).
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/filemap.c | 9 +++++++--
mm/internal.h | 2 +-
mm/page-writeback.c | 38 ++++++++++++++++----------------------
3 files changed, 24 insertions(+), 25 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index 3dad2615af41..ddcced4638b5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1595,9 +1595,15 @@ EXPORT_SYMBOL(folio_wait_private_2_killable);
/**
* folio_end_writeback - End writeback against a folio.
* @folio: The folio.
+ *
+ * The folio must actually be under writeback.
+ *
+ * Context: May be called from process or interrupt context.
*/
void folio_end_writeback(struct folio *folio)
{
+ VM_BUG_ON_FOLIO(!folio_test_writeback(folio), folio);
+
/*
* folio_test_clear_reclaim() could be used here but it is an
* atomic operation and overkill in this particular case. Failing
@@ -1617,8 +1623,7 @@ void folio_end_writeback(struct folio *folio)
* reused before the folio_wake().
*/
folio_get(folio);
- if (!__folio_end_writeback(folio))
- BUG();
+ __folio_end_writeback(folio);
smp_mb__after_atomic();
folio_wake(folio, PG_writeback);
diff --git a/mm/internal.h b/mm/internal.h
index 30cf724ddbce..ccb08dd9b5ec 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -105,7 +105,7 @@ static inline void wake_throttle_isolated(pg_data_t *pgdat)
vm_fault_t do_swap_page(struct vm_fault *vmf);
void folio_rotate_reclaimable(struct folio *folio);
-bool __folio_end_writeback(struct folio *folio);
+void __folio_end_writeback(struct folio *folio);
void deactivate_file_folio(struct folio *folio);
void folio_activate(struct folio *folio);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b8d3d7040a50..410b53e888e3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2940,11 +2940,10 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb)
spin_unlock_irqrestore(&wb->work_lock, flags);
}
-bool __folio_end_writeback(struct folio *folio)
+void __folio_end_writeback(struct folio *folio)
{
long nr = folio_nr_pages(folio);
struct address_space *mapping = folio_mapping(folio);
- bool ret;
folio_memcg_lock(folio);
if (mapping && mapping_use_writeback_tags(mapping)) {
@@ -2953,19 +2952,16 @@ bool __folio_end_writeback(struct folio *folio)
unsigned long flags;
xa_lock_irqsave(&mapping->i_pages, flags);
- ret = folio_test_clear_writeback(folio);
- if (ret) {
- __xa_clear_mark(&mapping->i_pages, folio_index(folio),
- PAGECACHE_TAG_WRITEBACK);
- if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
- struct bdi_writeback *wb = inode_to_wb(inode);
-
- wb_stat_mod(wb, WB_WRITEBACK, -nr);
- __wb_writeout_add(wb, nr);
- if (!mapping_tagged(mapping,
- PAGECACHE_TAG_WRITEBACK))
- wb_inode_writeback_end(wb);
- }
+ folio_test_clear_writeback(folio);
+ __xa_clear_mark(&mapping->i_pages, folio_index(folio),
+ PAGECACHE_TAG_WRITEBACK);
+ if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
+ struct bdi_writeback *wb = inode_to_wb(inode);
+
+ wb_stat_mod(wb, WB_WRITEBACK, -nr);
+ __wb_writeout_add(wb, nr);
+ if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
+ wb_inode_writeback_end(wb);
}
if (mapping->host && !mapping_tagged(mapping,
@@ -2974,15 +2970,13 @@ bool __folio_end_writeback(struct folio *folio)
xa_unlock_irqrestore(&mapping->i_pages, flags);
} else {
- ret = folio_test_clear_writeback(folio);
- }
- if (ret) {
- lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr);
- zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
- node_stat_mod_folio(folio, NR_WRITTEN, nr);
+ folio_test_clear_writeback(folio);
}
+
+ lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr);
+ zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
+ node_stat_mod_folio(folio, NR_WRITTEN, nr);
folio_memcg_unlock(folio);
- return ret;
}
bool __folio_start_writeback(struct folio *folio, bool keep_write)
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2 17/17] mm: Use folio_xor_flags_has_waiters() in folio_end_writeback()
2023-10-04 16:53 [PATCH v2 00/17] Add folio_end_read Matthew Wilcox (Oracle)
` (15 preceding siblings ...)
2023-10-04 16:53 ` [PATCH v2 16/17] mm: Make __end_folio_writeback() return void Matthew Wilcox (Oracle)
@ 2023-10-04 16:53 ` Matthew Wilcox (Oracle)
16 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-10-04 16:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Matthew Wilcox (Oracle), linux-mm, linux-fsdevel, linux-arch,
torvalds, npiggin
Match how folio_unlock() works by combining the test for PG_waiters with
the clearing of PG_writeback. This should have a small performance win,
and removes the last user of folio_wake().
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/filemap.c | 15 +++------------
mm/internal.h | 2 +-
mm/page-writeback.c | 9 ++++++---
3 files changed, 10 insertions(+), 16 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index ddcced4638b5..56bfba546be7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1177,13 +1177,6 @@ static void folio_wake_bit(struct folio *folio, int bit_nr)
spin_unlock_irqrestore(&q->lock, flags);
}
-static void folio_wake(struct folio *folio, int bit)
-{
- if (!folio_test_waiters(folio))
- return;
- folio_wake_bit(folio, bit);
-}
-
/*
* A choice of three behaviors for folio_wait_bit_common():
*/
@@ -1620,13 +1613,11 @@ void folio_end_writeback(struct folio *folio)
* Writeback does not hold a folio reference of its own, relying
* on truncation to wait for the clearing of PG_writeback.
* But here we must make sure that the folio is not freed and
- * reused before the folio_wake().
+ * reused before the folio_wake_bit().
*/
folio_get(folio);
- __folio_end_writeback(folio);
-
- smp_mb__after_atomic();
- folio_wake(folio, PG_writeback);
+ if (__folio_end_writeback(folio))
+ folio_wake_bit(folio, PG_writeback);
acct_reclaim_writeback(folio);
folio_put(folio);
}
diff --git a/mm/internal.h b/mm/internal.h
index ccb08dd9b5ec..30cf724ddbce 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -105,7 +105,7 @@ static inline void wake_throttle_isolated(pg_data_t *pgdat)
vm_fault_t do_swap_page(struct vm_fault *vmf);
void folio_rotate_reclaimable(struct folio *folio);
-void __folio_end_writeback(struct folio *folio);
+bool __folio_end_writeback(struct folio *folio);
void deactivate_file_folio(struct folio *folio);
void folio_activate(struct folio *folio);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 410b53e888e3..c52514fe1d23 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2940,10 +2940,11 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb)
spin_unlock_irqrestore(&wb->work_lock, flags);
}
-void __folio_end_writeback(struct folio *folio)
+bool __folio_end_writeback(struct folio *folio)
{
long nr = folio_nr_pages(folio);
struct address_space *mapping = folio_mapping(folio);
+ bool ret;
folio_memcg_lock(folio);
if (mapping && mapping_use_writeback_tags(mapping)) {
@@ -2952,7 +2953,7 @@ void __folio_end_writeback(struct folio *folio)
unsigned long flags;
xa_lock_irqsave(&mapping->i_pages, flags);
- folio_test_clear_writeback(folio);
+ ret = folio_xor_flags_has_waiters(folio, 1 << PG_writeback);
__xa_clear_mark(&mapping->i_pages, folio_index(folio),
PAGECACHE_TAG_WRITEBACK);
if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
@@ -2970,13 +2971,15 @@ void __folio_end_writeback(struct folio *folio)
xa_unlock_irqrestore(&mapping->i_pages, flags);
} else {
- folio_test_clear_writeback(folio);
+ ret = folio_xor_flags_has_waiters(folio, 1 << PG_writeback);
}
lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr);
zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
node_stat_mod_folio(folio, NR_WRITTEN, nr);
folio_memcg_unlock(folio);
+
+ return ret;
}
bool __folio_start_writeback(struct folio *folio, bool keep_write)
--
2.40.1
^ permalink raw reply related [flat|nested] 23+ messages in thread