From: Jens Axboe <axboe@kernel.dk>
To: linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
linux-block@vger.kernel.org
Cc: willy@infradead.org, clm@fb.com, Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED
Date: Tue, 10 Dec 2019 13:43:02 -0700 [thread overview]
Message-ID: <20191210204304.12266-4-axboe@kernel.dk> (raw)
In-Reply-To: <20191210204304.12266-1-axboe@kernel.dk>
If RWF_UNCACHED is set for io_uring (or pwritev2(2)), we'll drop the
cache instantiated for buffered writes. If new pages aren't
instantiated, we leave them alone. This provides similar semantics to
reads with RWF_UNCACHED set.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
include/linux/fs.h | 5 +++
mm/filemap.c | 85 +++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 85 insertions(+), 5 deletions(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bf58db1bc032..7ea3dfdd9aa5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -285,6 +285,7 @@ enum positive_aop_returns {
#define AOP_FLAG_NOFS 0x0002 /* used by filesystem to direct
* helper code (eg buffer layer)
* to clear GFP_FS from alloc */
+#define AOP_FLAG_UNCACHED 0x0004
/*
* oh the beauties of C type declarations.
@@ -3106,6 +3107,10 @@ extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
extern ssize_t generic_perform_write(struct file *, struct iov_iter *,
struct kiocb *);
+struct pagevec;
+extern void write_drop_cached_pages(struct pagevec *pvec,
+ struct address_space *mapping);
+
ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
rwf_t flags);
ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
diff --git a/mm/filemap.c b/mm/filemap.c
index fe37bd2b2630..2e36129ebe38 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3287,10 +3287,12 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
pgoff_t index, unsigned flags)
{
struct page *page;
- int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
+ int fgp_flags = FGP_LOCK|FGP_WRITE;
if (flags & AOP_FLAG_NOFS)
fgp_flags |= FGP_NOFS;
+ if (!(flags & AOP_FLAG_UNCACHED))
+ fgp_flags |= FGP_CREAT;
page = pagecache_get_page(mapping, index, fgp_flags,
mapping_gfp_mask(mapping));
@@ -3301,21 +3303,65 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
}
EXPORT_SYMBOL(grab_cache_page_write_begin);
+/*
+ * Start writeback on the pages in pgs[], and then try and remove those pages
+ * from the page cached. Used with RWF_UNCACHED.
+ */
+void write_drop_cached_pages(struct pagevec *pvec,
+ struct address_space *mapping)
+{
+ loff_t start, end;
+ int i;
+
+ end = 0;
+ start = LLONG_MAX;
+ for (i = 0; i < pagevec_count(pvec); i++) {
+ loff_t off = page_offset(pvec->pages[i]);
+ if (off < start)
+ start = off;
+ if (off > end)
+ end = off;
+ }
+
+ __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_NONE);
+
+ for (i = 0; i < pagevec_count(pvec); i++) {
+ struct page *page = pvec->pages[i];
+
+ lock_page(page);
+ if (page->mapping == mapping) {
+ wait_on_page_writeback(page);
+ if (!page_has_private(page) ||
+ try_to_release_page(page, 0))
+ remove_mapping(mapping, page);
+ }
+ unlock_page(page);
+ }
+ pagevec_release(pvec);
+}
+EXPORT_SYMBOL_GPL(write_drop_cached_pages);
+
+#define GPW_PAGE_BATCH 16
+
ssize_t generic_perform_write(struct file *file,
struct iov_iter *i, struct kiocb *iocb)
{
struct address_space *mapping = file->f_mapping;
const struct address_space_operations *a_ops = mapping->a_ops;
loff_t pos = iocb->ki_pos;
+ struct pagevec pvec;
long status = 0;
ssize_t written = 0;
unsigned int flags = 0;
+ pagevec_init(&pvec);
+
do {
struct page *page;
unsigned long offset; /* Offset into pagecache page */
unsigned long bytes; /* Bytes to write to page */
size_t copied; /* Bytes copied from user */
+ bool drop_page = false; /* drop page after IO */
void *fsdata;
offset = (pos & (PAGE_SIZE - 1));
@@ -3323,6 +3369,9 @@ ssize_t generic_perform_write(struct file *file,
iov_iter_count(i));
again:
+ if (iocb->ki_flags & IOCB_UNCACHED)
+ flags |= AOP_FLAG_UNCACHED;
+
/*
* Bring in the user page that we will copy from _first_.
* Otherwise there's a nasty deadlock on copying from the
@@ -3343,10 +3392,17 @@ ssize_t generic_perform_write(struct file *file,
break;
}
+retry:
status = a_ops->write_begin(file, mapping, pos, bytes, flags,
&page, &fsdata);
- if (unlikely(status < 0))
+ if (unlikely(status < 0)) {
+ if (status == -ENOMEM && (flags & AOP_FLAG_UNCACHED)) {
+ drop_page = true;
+ flags &= ~AOP_FLAG_UNCACHED;
+ goto retry;
+ }
break;
+ }
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
@@ -3354,10 +3410,16 @@ ssize_t generic_perform_write(struct file *file,
copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
flush_dcache_page(page);
+ if (drop_page)
+ get_page(page);
+
status = a_ops->write_end(file, mapping, pos, bytes, copied,
page, fsdata);
- if (unlikely(status < 0))
+ if (unlikely(status < 0)) {
+ if (drop_page)
+ put_page(page);
break;
+ }
copied = status;
cond_resched();
@@ -3374,14 +3436,27 @@ ssize_t generic_perform_write(struct file *file,
*/
bytes = min_t(unsigned long, PAGE_SIZE - offset,
iov_iter_single_seg_count(i));
+ if (drop_page)
+ put_page(page);
goto again;
}
+ if (drop_page &&
+ ((pos >> PAGE_SHIFT) != ((pos + copied) >> PAGE_SHIFT))) {
+ if (!pagevec_add(&pvec, page))
+ write_drop_cached_pages(&pvec, mapping);
+ } else {
+ if (drop_page)
+ put_page(page);
+ balance_dirty_pages_ratelimited(mapping);
+ }
+
pos += copied;
written += copied;
-
- balance_dirty_pages_ratelimited(mapping);
} while (iov_iter_count(i));
+ if (pagevec_count(&pvec))
+ write_drop_cached_pages(&pvec, mapping);
+
return written ? written : status;
}
EXPORT_SYMBOL(generic_perform_write);
--
2.24.0
next prev parent reply other threads:[~2019-12-10 20:43 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-12-10 20:42 [PATCHSET v2 0/5] Support for RWF_UNCACHED Jens Axboe
2019-12-10 20:43 ` [PATCH 1/5] fs: add read support " Jens Axboe
2019-12-10 20:43 ` [PATCH 2/5] mm: make generic_perform_write() take a struct kiocb Jens Axboe
2019-12-10 20:43 ` Jens Axboe [this message]
2019-12-14 0:01 ` [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED Andrew Morton
2019-12-10 20:43 ` [PATCH 4/5] iomap: pass in the write_begin/write_end flags to iomap_actor Jens Axboe
2019-12-10 20:43 ` [PATCH 5/5] iomap: support RWF_UNCACHED for buffered writes Jens Axboe
2019-12-11 1:14 ` Dave Chinner
2019-12-11 14:44 ` Jens Axboe
-- strict thread matches above, loose matches on Subject: below --
2019-12-12 19:01 [PATCHSET v4 0/5] Support for RWF_UNCACHED Jens Axboe
2019-12-12 19:01 ` [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED Jens Axboe
2019-12-11 15:29 [PATCHSET v3 0/5] Support for RWF_UNCACHED Jens Axboe
2019-12-11 15:29 ` [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED Jens Axboe
2019-12-10 16:24 [PATCHSET 0/5] Support for RWF_UNCACHED Jens Axboe
2019-12-10 16:24 ` [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED Jens Axboe
2019-12-10 16:55 ` Matthew Wilcox
2019-12-10 17:02 ` Jens Axboe
2019-12-10 18:35 ` Chris Mason
2019-12-10 18:58 ` Matthew Wilcox
2019-12-10 19:10 ` Jens Axboe
2019-12-11 0:23 ` Dave Chinner
2019-12-11 0:28 ` Dave Chinner
2019-12-11 14:39 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191210204304.12266-4-axboe@kernel.dk \
--to=axboe@kernel.dk \
--cc=clm@fb.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.