linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
	linux-block@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED
Date: Tue, 10 Dec 2019 09:24:52 -0700	[thread overview]
Message-ID: <20191210162454.8608-4-axboe@kernel.dk> (raw)
In-Reply-To: <20191210162454.8608-1-axboe@kernel.dk>

If RWF_UNCACHED is set for io_uring (or pwritev2(2)), we'll drop the
cache instantiated for buffered writes. If new pages aren't
instantiated, we leave them alone. This provides similar semantics to
reads with RWF_UNCACHED set.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/fs.h |  3 ++
 mm/filemap.c       | 78 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 76 insertions(+), 5 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bf58db1bc032..bcf486c132a8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -285,6 +285,7 @@ enum positive_aop_returns {
 #define AOP_FLAG_NOFS			0x0002 /* used by filesystem to direct
 						* helper code (eg buffer layer)
 						* to clear GFP_FS from alloc */
+#define AOP_FLAG_UNCACHED		0x0004
 
 /*
  * oh the beauties of C type declarations.
@@ -3105,6 +3106,8 @@ extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *,
 				     struct kiocb *);
+extern void write_drop_cached_pages(struct page **,
+				    struct address_space *mapping, unsigned *);
 
 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
 		rwf_t flags);
diff --git a/mm/filemap.c b/mm/filemap.c
index fe37bd2b2630..d6171bf705f9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3287,10 +3287,12 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
 					pgoff_t index, unsigned flags)
 {
 	struct page *page;
-	int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
+	int fgp_flags = FGP_LOCK|FGP_WRITE;
 
 	if (flags & AOP_FLAG_NOFS)
 		fgp_flags |= FGP_NOFS;
+	if (!(flags & AOP_FLAG_UNCACHED))
+		fgp_flags |= FGP_CREAT;
 
 	page = pagecache_get_page(mapping, index, fgp_flags,
 			mapping_gfp_mask(mapping));
@@ -3301,21 +3303,67 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
 }
 EXPORT_SYMBOL(grab_cache_page_write_begin);
 
+/*
+ * Start writeback on the pages in pgs[], and then try and remove those pages
+ * from the page cached. Used with RWF_UNCACHED.
+ */
+void write_drop_cached_pages(struct page **pgs, struct address_space *mapping,
+			     unsigned *nr)
+{
+	loff_t start, end;
+	int i;
+
+	end = 0;
+	start = LLONG_MAX;
+	for (i = 0; i < *nr; i++) {
+		struct page *page = pgs[i];
+		loff_t off;
+
+		off = (loff_t) page_to_index(page) << PAGE_SHIFT;
+		if (off < start)
+			start = off;
+		if (off > end)
+			end = off;
+		get_page(page);
+	}
+
+	__filemap_fdatawrite_range(mapping, start, end, WB_SYNC_NONE);
+
+	for (i = 0; i < *nr; i++) {
+		struct page *page = pgs[i];
+
+		lock_page(page);
+		if (page->mapping == mapping) {
+			wait_on_page_writeback(page);
+			if (!page_has_private(page) ||
+			    try_to_release_page(page, 0))
+				remove_mapping(mapping, page);
+		}
+		unlock_page(page);
+	}
+	*nr = 0;
+}
+EXPORT_SYMBOL_GPL(write_drop_cached_pages);
+
+#define GPW_PAGE_BATCH		16
+
 ssize_t generic_perform_write(struct file *file,
 				struct iov_iter *i, struct kiocb *iocb)
 {
 	struct address_space *mapping = file->f_mapping;
 	const struct address_space_operations *a_ops = mapping->a_ops;
+	struct page *drop_pages[GPW_PAGE_BATCH];
 	loff_t pos = iocb->ki_pos;
 	long status = 0;
 	ssize_t written = 0;
-	unsigned int flags = 0;
+	unsigned int flags = 0, drop_nr = 0;
 
 	do {
 		struct page *page;
 		unsigned long offset;	/* Offset into pagecache page */
 		unsigned long bytes;	/* Bytes to write to page */
 		size_t copied;		/* Bytes copied from user */
+		bool drop_page = false;	/* drop page after IO */
 		void *fsdata;
 
 		offset = (pos & (PAGE_SIZE - 1));
@@ -3323,6 +3371,9 @@ ssize_t generic_perform_write(struct file *file,
 						iov_iter_count(i));
 
 again:
+		if (iocb->ki_flags & IOCB_UNCACHED)
+			flags |= AOP_FLAG_UNCACHED;
+
 		/*
 		 * Bring in the user page that we will copy from _first_.
 		 * Otherwise there's a nasty deadlock on copying from the
@@ -3343,10 +3394,17 @@ ssize_t generic_perform_write(struct file *file,
 			break;
 		}
 
+retry:
 		status = a_ops->write_begin(file, mapping, pos, bytes, flags,
 						&page, &fsdata);
-		if (unlikely(status < 0))
+		if (unlikely(status < 0)) {
+			if (status == -ENOMEM && (flags & AOP_FLAG_UNCACHED)) {
+				drop_page = true;
+				flags &= ~AOP_FLAG_UNCACHED;
+				goto retry;
+			}
 			break;
+		}
 
 		if (mapping_writably_mapped(mapping))
 			flush_dcache_page(page);
@@ -3376,12 +3434,22 @@ ssize_t generic_perform_write(struct file *file,
 						iov_iter_single_seg_count(i));
 			goto again;
 		}
+		if (drop_page &&
+		    ((pos >> PAGE_SHIFT) != ((pos + copied) >> PAGE_SHIFT))) {
+			drop_pages[drop_nr] = page;
+			if (++drop_nr == GPW_PAGE_BATCH)
+				write_drop_cached_pages(drop_pages, mapping,
+								&drop_nr);
+		} else
+			balance_dirty_pages_ratelimited(mapping);
+
 		pos += copied;
 		written += copied;
-
-		balance_dirty_pages_ratelimited(mapping);
 	} while (iov_iter_count(i));
 
+	if (drop_nr)
+		write_drop_cached_pages(drop_pages, mapping, &drop_nr);
+
 	return written ? written : status;
 }
 EXPORT_SYMBOL(generic_perform_write);
-- 
2.24.0


  parent reply	other threads:[~2019-12-10 16:25 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-10 16:24 [PATCHSET 0/5] Support for RWF_UNCACHED Jens Axboe
2019-12-10 16:24 ` [PATCH 1/5] fs: add read support " Jens Axboe
2019-12-10 16:24 ` [PATCH 2/5] mm: make generic_perform_write() take a struct kiocb Jens Axboe
2019-12-10 16:24 ` Jens Axboe [this message]
2019-12-10 16:55   ` [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED Matthew Wilcox
2019-12-10 17:02     ` Jens Axboe
2019-12-10 18:35       ` Chris Mason
2019-12-10 18:58       ` Matthew Wilcox
2019-12-10 19:10         ` Jens Axboe
2019-12-11  0:23   ` Dave Chinner
2019-12-11  0:28     ` Dave Chinner
2019-12-11 14:39     ` Jens Axboe
2019-12-10 16:24 ` [PATCH 4/5] iomap: pass in the write_begin/write_end flags to iomap_actor Jens Axboe
2019-12-10 16:24 ` [PATCH 5/5] iomap: support RWF_UNCACHED for buffered writes Jens Axboe
2019-12-10 21:17 ` [PATCHSET 0/5] Support for RWF_UNCACHED Andreas Dilger
2019-12-12 15:47 ` Christoph Hellwig
2019-12-12 15:52   ` Jens Axboe
  -- strict thread matches above, loose matches on Subject: below --
2019-12-10 20:42 [PATCHSET v2 " Jens Axboe
2019-12-10 20:43 ` [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED Jens Axboe
2019-12-14  0:01   ` Andrew Morton
2019-12-11 15:29 [PATCHSET v3 0/5] Support for RWF_UNCACHED Jens Axboe
2019-12-11 15:29 ` [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED Jens Axboe
2019-12-12 19:01 [PATCHSET v4 0/5] Support for RWF_UNCACHED Jens Axboe
2019-12-12 19:01 ` [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191210162454.8608-4-axboe@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).