[PATCH v1 4/6] mm:swap: use on-stack-bio for BDI_CAP_SYNC devices

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Minchan Kim <minchan@kernel.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	Ross Zwisler <ross.zwisler@linux.intel.com>,
	"karam . lee" <karam.lee@lge.com>,
	seungho1.park@lge.com, Matthew Wilcox <willy@infradead.org>,
	Christoph Hellwig <hch@lst.de>,
	Dan Williams <dan.j.williams@intel.com>,
	Dave Chinner <david@fromorbit.com>,
	jack@suse.cz, Jens Axboe <axboe@kernel.dk>,
	Vishal Verma <vishal.l.verma@intel.com>,
	linux-nvdimm@lists.01.org, kernel-team <kernel-team@lge.com>,
	Minchan Kim <minchan@kernel.org>
Subject: [PATCH v1 4/6] mm:swap: use on-stack-bio for BDI_CAP_SYNC devices
Date: Tue,  8 Aug 2017 15:50:22 +0900	[thread overview]
Message-ID: <1502175024-28338-5-git-send-email-minchan@kernel.org> (raw)
In-Reply-To: <1502175024-28338-1-git-send-email-minchan@kernel.org>

There is no need to use dynamic bio allocation for BDI_CAP_SYNC
devices. They can live with on-stack-bio without concern about
waiting bio allocation from mempool under heavy memory pressure.

It would be much better for swap devices because the bio mempool
for swap IO have been used with fs. It means super-fast swap
IO like zram don't need to depends on slow eMMC read/write
completion.

Signed-off-by: Minchan Kim <minchan@kernel.org>
---
 include/linux/swap.h |   3 +-
 mm/page_io.c         | 123 +++++++++++++++++++++++++++++++++++----------------
 mm/swapfile.c        |   3 ++
 3 files changed, 89 insertions(+), 40 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index ae3da979a7b7..6ed9b6423f7d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -152,8 +152,9 @@ enum {
 	SWP_AREA_DISCARD = (1 << 8),	/* single-time swap area discards */
 	SWP_PAGE_DISCARD = (1 << 9),	/* freed swap page-cluster discards */
 	SWP_STABLE_WRITES = (1 << 10),	/* no overwrite PG_writeback pages */
+	SWP_SYNC_IO	= (1<<11),	/* synchronous IO is efficient */
 					/* add others here before... */
-	SWP_SCANNING	= (1 << 11),	/* refcount in scan_swap_map */
+	SWP_SCANNING	= (1 << 12),	/* refcount in scan_swap_map */
 };
 
 #define SWAP_CLUSTER_MAX 32UL
diff --git a/mm/page_io.c b/mm/page_io.c
index 3502a97f7c48..d794fd810773 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -44,7 +44,7 @@ static struct bio *get_swap_bio(gfp_t gfp_flags,
 	return bio;
 }
 
-void end_swap_bio_write(struct bio *bio)
+void end_swap_bio_write_simple(struct bio *bio)
 {
 	struct page *page = bio->bi_io_vec[0].bv_page;
 
@@ -66,6 +66,11 @@ void end_swap_bio_write(struct bio *bio)
 		ClearPageReclaim(page);
 	}
 	end_page_writeback(page);
+}
+
+void end_swap_bio_write(struct bio *bio)
+{
+	end_swap_bio_write_simple(bio);
 	bio_put(bio);
 }
 
@@ -117,10 +122,9 @@ static void swap_slot_free_notify(struct page *page)
 	}
 }
 
-static void end_swap_bio_read(struct bio *bio)
+static void end_swap_bio_read_simple(struct bio *bio)
 {
 	struct page *page = bio->bi_io_vec[0].bv_page;
-	struct task_struct *waiter = bio->bi_private;
 
 	if (bio->bi_status) {
 		SetPageError(page);
@@ -136,6 +140,13 @@ static void end_swap_bio_read(struct bio *bio)
 	swap_slot_free_notify(page);
 out:
 	unlock_page(page);
+}
+
+static void end_swap_bio_read(struct bio *bio)
+{
+	struct task_struct *waiter = bio->bi_private;
+
+	end_swap_bio_read_simple(bio);
 	WRITE_ONCE(bio->bi_private, NULL);
 	bio_put(bio);
 	wake_up_process(waiter);
@@ -275,7 +286,6 @@ static inline void count_swpout_vm_event(struct page *page)
 
 int __swap_writepage(struct page *page, struct writeback_control *wbc)
 {
-	struct bio *bio;
 	int ret;
 	struct swap_info_struct *sis = page_swap_info(page);
 
@@ -328,25 +338,43 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc)
 	}
 
 	ret = 0;
-	bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write);
-	if (bio == NULL) {
-		set_page_dirty(page);
+	if (!(sis->flags & SWP_SYNC_IO)) {
+		struct bio *bio;
+
+		bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write);
+		if (bio == NULL) {
+			set_page_dirty(page);
+			unlock_page(page);
+			ret = -ENOMEM;
+			goto out;
+		}
+		bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+		set_page_writeback(page);
 		unlock_page(page);
-		ret = -ENOMEM;
-		goto out;
+		submit_bio(bio);
+	} else {
+
+		/* on-stack-bio */
+		struct bio sbio;
+		struct bio_vec bvec;
+
+		bio_init(&sbio, &bvec, 1);
+		sbio.bi_bdev = sis->bdev;
+		sbio.bi_iter.bi_sector = swap_page_sector(page);
+		sbio.bi_end_io = end_swap_bio_write_simple;
+		bio_add_page(&sbio, page, PAGE_SIZE, 0);
+		bio_set_op_attrs(&sbio, REQ_OP_WRITE, wbc_to_write_flags(wbc));
+		set_page_writeback(page);
+		unlock_page(page);
+		submit_bio(&sbio);
 	}
-	bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
 	count_swpout_vm_event(page);
-	set_page_writeback(page);
-	unlock_page(page);
-	submit_bio(bio);
 out:
 	return ret;
 }
 
 int swap_readpage(struct page *page, bool do_poll)
 {
-	struct bio *bio;
 	int ret = 0;
 	struct swap_info_struct *sis = page_swap_info(page);
 	blk_qc_t qc;
@@ -383,33 +411,50 @@ int swap_readpage(struct page *page, bool do_poll)
 	}
 
 	ret = 0;
-	bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
-	if (bio == NULL) {
-		unlock_page(page);
-		ret = -ENOMEM;
-		goto out;
-	}
-	bdev = bio->bi_bdev;
-	/*
-	 * Keep this task valid during swap readpage because the oom killer may
-	 * attempt to access it in the page fault retry time check.
-	 */
-	get_task_struct(current);
-	bio->bi_private = current;
-	bio_set_op_attrs(bio, REQ_OP_READ, 0);
 	count_vm_event(PSWPIN);
-	bio_get(bio);
-	qc = submit_bio(bio);
-	while (do_poll) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (!READ_ONCE(bio->bi_private))
-			break;
-
-		if (!blk_mq_poll(bdev_get_queue(bdev), qc))
-			break;
+	if (!(sis->flags & SWP_SYNC_IO)) {
+		struct bio *bio;
+
+		bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
+		if (bio == NULL) {
+			unlock_page(page);
+			ret = -ENOMEM;
+			goto out;
+		}
+		bdev = bio->bi_bdev;
+		/*
+		 * Keep this task valid during swap readpage because
+		 * the oom killer may attempt to access it
+		 * in the page fault retry time check.
+		 */
+		get_task_struct(current);
+		bio->bi_private = current;
+		bio_set_op_attrs(bio, REQ_OP_READ, 0);
+		bio_get(bio);
+		qc = submit_bio(bio);
+		while (do_poll) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			if (!READ_ONCE(bio->bi_private))
+				break;
+
+			if (!blk_mq_poll(bdev_get_queue(bdev), qc))
+				break;
+		}
+		__set_current_state(TASK_RUNNING);
+		bio_put(bio);
+	} else {
+		/* on-stack-bio */
+		struct bio sbio;
+		struct bio_vec bvec;
+
+		bio_init(&sbio, &bvec, 1);
+		sbio.bi_bdev = sis->bdev;
+		sbio.bi_iter.bi_sector = swap_page_sector(page);
+		sbio.bi_end_io = end_swap_bio_read_simple;
+		bio_add_page(&sbio, page, PAGE_SIZE, 0);
+		bio_set_op_attrs(&sbio, REQ_OP_READ, 0);
+		submit_bio(&sbio);
 	}
-	__set_current_state(TASK_RUNNING);
-	bio_put(bio);
 
 out:
 	return ret;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 42eff9e4e972..e916b325b0b7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3113,6 +3113,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	if (bdi_cap_stable_pages_required(inode_to_bdi(inode)))
 		p->flags |= SWP_STABLE_WRITES;
 
+	if (bdi_cap_synchronous_io(inode_to_bdi(inode)))
+		p->flags |= SWP_SYNC_IO;
+
 	if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
 		int cpu;
 		unsigned long ci, nr_cluster;
-- 
2.7.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2017-08-08  6:50 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-08  6:50 [PATCH v1 0/6] Remove rw_page Minchan Kim
2017-08-08  6:50 ` [PATCH v1 1/6] bdi: introduce BDI_CAP_SYNC Minchan Kim
2017-08-08  6:50 ` [PATCH v1 2/6] fs: use on-stack-bio if backing device has BDI_CAP_SYNC capability Minchan Kim
2017-08-08 12:49   ` Matthew Wilcox
2017-08-08 13:29     ` Matthew Wilcox
2017-08-09  1:51       ` Minchan Kim
2017-08-09  2:31         ` Matthew Wilcox
2017-08-09  2:41           ` Minchan Kim
2017-08-10  3:04             ` Matthew Wilcox
2017-08-10  3:06               ` Dan Williams
2017-08-11 10:46                 ` Christoph Hellwig
2017-08-11 14:26                   ` Jens Axboe
2017-08-14  8:50                     ` Minchan Kim
2017-08-14 14:36                       ` Jens Axboe
2017-08-14 15:06                         ` Minchan Kim
2017-08-14 15:14                           ` Jens Axboe
2017-08-14 15:31                             ` Minchan Kim
2017-08-14 15:38                               ` Jens Axboe
2017-08-14 16:17                                 ` Jens Axboe
2017-08-16  4:48                                   ` Minchan Kim
2017-08-16 15:56                                     ` Jens Axboe
2017-08-21  6:13                                       ` Minchan Kim
2017-08-14  8:48                   ` Minchan Kim
2017-08-10  4:00               ` Minchan Kim
2017-08-09  1:48     ` Minchan Kim
2017-08-08  6:50 ` [PATCH v1 3/6] mm:swap: remove end_swap_bio_write argument Minchan Kim
2017-08-08  6:50 ` Minchan Kim [this message]
2017-08-08  6:50 ` [PATCH v1 5/6] zram: remove zram_rw_page Minchan Kim
2017-08-08  7:02   ` Sergey Senozhatsky
2017-08-08  8:13     ` Minchan Kim
2017-08-08  8:23       ` Sergey Senozhatsky
2017-08-08 15:48         ` Matthew Wilcox
2017-08-08  6:50 ` [PATCH v1 6/6] fs: remove rw_page Minchan Kim

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:ae3da979a7b dfblob:6ed9b6423f7 dfblob:3502a97f7c4
dfblob:d794fd81077 dfblob:42eff9e4e97 dfblob:e916b325b0b )
 OR (
bs:"[PATCH v1 4/6] mm:swap: use on-stack-bio for BDI_CAP_SYNC devices" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1502175024-28338-5-git-send-email-minchan@kernel.org \
    --to=minchan@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=axboe@kernel.dk \
    --cc=dan.j.williams@intel.com \
    --cc=david@fromorbit.com \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=karam.lee@lge.com \
    --cc=kernel-team@lge.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=ross.zwisler@linux.intel.com \
    --cc=seungho1.park@lge.com \
    --cc=vishal.l.verma@intel.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).