linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: axboe@kernel.dk
Cc: linux-kernel@vger.kernel.org, jack@suse.cz, hch@infradead.org,
	hannes@cmpxchg.org, linux-fsdevel@vger.kernel.org,
	vgoyal@redhat.com, lizefan@huawei.com, cgroups@vger.kernel.org,
	linux-mm@kvack.org, mhocko@suse.cz, clm@fb.com,
	fengguang.wu@intel.com, david@fromorbit.com,
	Tejun Heo <tj@kernel.org>
Subject: [PATCH 37/45] writeback: make writeback_control carry the inode_wb_link being served
Date: Tue,  6 Jan 2015 16:26:14 -0500	[thread overview]
Message-ID: <1420579582-8516-38-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1420579582-8516-1-git-send-email-tj@kernel.org>

A wbc (writeback_control) is constructed at the beginning of writeback
and passed throughout the writeback path.  It serves as the control
structure carrying both in and out parameters for writeback of each
inode.  This patch adds wbc->iwbl so that it also carries the cgroup
writeback context.

wbc_set_iwbl(), which is called by writeback_sb_inodes() while kicking
off writeback of each inode, associates the wbc with the iwbl
(inode_wb_link) being serviced.  inode_writeback_iwbl() and
bdi_writeback_wb() are used to determine the iwbl from inode and wb
(bdi_writeback) from bdi being serviced considering wbc->iwbl.  If the
wbc has a specific iwbl associated with it, the iwbl is used to
determine them; otherwise, the root cgroup is assumed.

This allows accessing the current cgroup wb and iwbl being serviced
throughout the writeback path.  __writeback_single_inode(), which used
to assume the root iwbl, are updated to use inode_writeback_iwbl().
writeback_single_inode() now also uses inode_wirteback_iwbl() and
drops @wb and determines it using iwbl_to_wb() instead.  For
clear_page_dirty_for_io() which used to re-lookup the dirty wb using
page_cgwb_dirty(), a new function, clear_page_dirty_for_io_wbc() which
takes additional @wbc and uses bdi_writeback_wb(), is added.

This patch also adds wbc_blkcg_css() which determines whether the
current writebfffack is for a specific cgroup and which cgroup.  This
will be used by future patches.

This propagates the cgroup writeback context throughout most of
writeback path so that the cgroup specific data is accessible without
repeating lookups from page blkcg.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
---
 fs/fs-writeback.c           | 36 ++++++++++++++++++++------
 include/linux/backing-dev.h | 62 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/writeback.h   |  3 +++
 mm/page-writeback.c         | 21 ++++++++++++---
 4 files changed, 111 insertions(+), 11 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index dfcf5dd..562b75f 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -604,6 +604,21 @@ static void inode_icgwbls_del(struct inode *inode)
 	}
 }
 
+/**
+ * wbc_set_iwbl - associate a wbc with an iwbl
+ * @wbc: target writeback_control
+ * @iwbl: inode_wb_link to associate @wbc with
+ *
+ * Writeback for @iwbl is about to be performed with @wbc as the control
+ * structure.  Associate @wbc with @iwbl so that writeback implementation
+ * can retrieve @iwbl from @wbc.
+ */
+static inline void wbc_set_iwbl(struct writeback_control *wbc,
+				struct inode_wb_link *iwbl)
+{
+	wbc->iwbl = iwbl;
+}
+
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static void init_cgwb_dirty_page_context(struct dirty_context *dctx)
@@ -685,6 +700,11 @@ static void inode_icgwbls_del(struct inode *inode)
 {
 }
 
+static inline void wbc_set_iwbl(struct writeback_control *wbc,
+				struct inode_wb_link *iwbl)
+{
+}
+
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 /**
@@ -1019,7 +1039,7 @@ static int
 __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct address_space *mapping = inode->i_mapping;
-	struct inode_wb_link *iwbl = &inode->i_wb_link;
+	struct inode_wb_link *iwbl = inode_writeback_iwbl(inode, wbc);
 	long nr_to_write = wbc->nr_to_write;
 	unsigned dirty;
 	int ret;
@@ -1090,10 +1110,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
  * and does more profound writeback list handling in writeback_sb_inodes().
  */
 static int
-writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
-		       struct writeback_control *wbc)
+writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	struct inode_wb_link *iwbl = &inode->i_wb_link;
+	struct inode_wb_link *iwbl = inode_writeback_iwbl(inode, wbc);
+	struct bdi_writeback *wb = iwbl_to_wb(iwbl);
 	int ret = 0;
 
 	spin_lock(&inode->i_lock);
@@ -1222,6 +1242,8 @@ static long writeback_sb_inodes(struct super_block *sb,
 			break;
 		}
 
+		wbc_set_iwbl(&wbc, iwbl);
+
 		/*
 		 * Don't bother with new inodes or inodes being freed, first
 		 * kind does not need periodic writeout yet, and for the latter
@@ -2020,7 +2042,6 @@ EXPORT_SYMBOL(sync_inodes_sb);
  */
 int write_inode_now(struct inode *inode, int sync)
 {
-	struct bdi_writeback *wb = iwbl_to_wb(&inode->i_wb_link);
 	struct writeback_control wbc = {
 		.nr_to_write = LONG_MAX,
 		.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
@@ -2032,7 +2053,7 @@ int write_inode_now(struct inode *inode, int sync)
 		wbc.nr_to_write = 0;
 
 	might_sleep();
-	return writeback_single_inode(inode, wb, &wbc);
+	return writeback_single_inode(inode, &wbc);
 }
 EXPORT_SYMBOL(write_inode_now);
 
@@ -2049,8 +2070,7 @@ EXPORT_SYMBOL(write_inode_now);
  */
 int sync_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	return writeback_single_inode(inode, iwbl_to_wb(&inode->i_wb_link),
-				      wbc);
+	return writeback_single_inode(inode, wbc);
 }
 EXPORT_SYMBOL(sync_inode);
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 6c16d10..5a163fa 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -520,6 +520,50 @@ iwbl_lookup(struct inode *inode, struct cgroup_subsys_state *blkcg_css)
 	return iwbl;
 }
 
+/**
+ * inode_writeback_iwbl - determine the inode_wb_link under writeback
+ * @inode: inode under writeback
+ * @wbc: writeback_control in effect
+ *
+ * Called from code path which is writing back @inode with @wbc to
+ * determine the iwbl (inode_wb_link) this writeback is for.  Guaranteed to
+ * return a valid iwbl.
+ */
+static inline struct inode_wb_link *
+inode_writeback_iwbl(struct inode *inode, struct writeback_control *wbc)
+{
+	return wbc->iwbl ?: &inode->i_wb_link;
+}
+
+/**
+ * bdi_writeback_wb - determine the bdi_writeback under writeback
+ * @bdi: backing_dev_info under writeback
+ * @wbc: writeback_control in effect
+ *
+ * Called from code path which is writing back @bdi with @wbc to determine
+ * the wb (bdi_writebck) this writeback is for.  Guaranteed to return a
+ * valid wb.
+ */
+static inline struct bdi_writeback *
+bdi_writeback_wb(struct backing_dev_info *bdi, struct writeback_control *wbc)
+{
+	return wbc->iwbl ? iwbl_to_wb(wbc->iwbl) : &bdi->wb;
+}
+
+/**
+ * wbc_blkcg_css - return the blkcg_css associated with a wbc
+ * @wbc: writeback_control of interest
+ *
+ * Return the blkcg_css of the inode_wb_link @wbc is associated with.  If
+ * @wbc hasn't been associated with an iwbl using wbc_set_iwbl(), %NULL is
+ * returned.
+ */
+static inline struct cgroup_subsys_state *
+wbc_blkcg_css(struct writeback_control *wbc)
+{
+	return wbc->iwbl ? iwbl_to_wb(wbc->iwbl)->blkcg_css : NULL;
+}
+
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static inline bool mapping_cgwb_enabled(struct address_space *mapping)
@@ -596,6 +640,24 @@ iwbl_lookup(struct inode *inode, struct cgroup_subsys_state *blkcg_css)
 	return &inode->i_wb_link;
 }
 
+static inline struct inode_wb_link *
+inode_writeback_iwbl(struct inode *inode, struct writeback_control *wbc)
+{
+	return &inode->i_wb_link;
+}
+
+static inline struct bdi_writeback *
+bdi_writeback_wb(struct backing_dev_info *bdi, struct writeback_control *wbc)
+{
+	return &bdi->wb;
+}
+
+static inline struct cgroup_subsys_state *
+wbc_blkcg_css(struct writeback_control *wbc)
+{
+	return NULL;
+}
+
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 static inline int mapping_read_congested(struct address_space *mapping,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 75349bb..dad1953 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -84,6 +84,9 @@ struct writeback_control {
 	unsigned for_reclaim:1;		/* Invoked from the page allocator */
 	unsigned range_cyclic:1;	/* range_start is cyclic */
 	unsigned for_sync:1;		/* sync(2) WB_SYNC_ALL writeback */
+#ifdef CONFIG_CGROUP_WRITEBACK
+	struct inode_wb_link *iwbl;	/* iwbl this writeback is for */
+#endif
 };
 
 /*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 4cf365c..3e31458 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -148,6 +148,9 @@ static struct timer_list writeout_period_timer =
 		TIMER_DEFERRED_INITIALIZER(writeout_period, 0, 0);
 static unsigned long writeout_period_time = 0;
 
+static int clear_page_dirty_for_io_wbc(struct page *page,
+				       struct writeback_control *wbc);
+
 /*
  * Length of period for aging writeout fractions of bdis. This is an
  * arbitrarily chosen number. The longer the period, the slower fractions will
@@ -1993,7 +1996,7 @@ continue_unlock:
 			}
 
 			BUG_ON(PageWriteback(page));
-			if (!clear_page_dirty_for_io(page))
+			if (!clear_page_dirty_for_io_wbc(page, wbc))
 				goto continue_unlock;
 
 			trace_wbc_writepage(wbc, mapping->backing_dev_info);
@@ -2326,7 +2329,8 @@ EXPORT_SYMBOL(set_page_dirty_lock);
  * This incoherency between the page's dirty flag and radix-tree tag is
  * unfortunate, but it only exists while the page is locked.
  */
-int clear_page_dirty_for_io(struct page *page)
+static int clear_page_dirty_for_io_wbc(struct page *page,
+				       struct writeback_control *wbc)
 {
 	struct address_space *mapping = page_mapping(page);
 
@@ -2369,7 +2373,8 @@ int clear_page_dirty_for_io(struct page *page)
 		 * exclusion.
 		 */
 		if (TestClearPageDirty(page)) {
-			struct bdi_writeback *wb = page_cgwb_dirty(page);
+			struct backing_dev_info *bdi = mapping->backing_dev_info;
+			struct bdi_writeback *wb = bdi_writeback_wb(bdi, wbc);
 
 			dec_zone_page_state(page, NR_FILE_DIRTY);
 			dec_wb_stat(wb, WB_RECLAIMABLE);
@@ -2380,6 +2385,16 @@ int clear_page_dirty_for_io(struct page *page)
 	}
 	return TestClearPageDirty(page);
 }
+
+int clear_page_dirty_for_io(struct page *page)
+{
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = 1,
+	};
+
+	return clear_page_dirty_for_io_wbc(page, &wbc);
+}
 EXPORT_SYMBOL(clear_page_dirty_for_io);
 
 int test_clear_page_writeback(struct page *page)
-- 
2.1.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2015-01-06 21:27 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-06 21:25 [PATCHSET RFC block/for-next] writeback: cgroup writeback support Tejun Heo
2015-01-06 21:25 ` [PATCH 01/45] writeback: add struct dirty_context Tejun Heo
2015-01-06 21:25 ` [PATCH 02/45] writeback: add {CONFIG|BDI_CAP|FS}_CGROUP_WRITEBACK Tejun Heo
2015-01-06 21:25 ` [PATCH 03/45] memcg: encode page_cgflags in the lower bits of page->mem_cgroup Tejun Heo
2015-01-06 21:25 ` [PATCH 04/45] memcg, writeback: implement memcg_blkcg_ptr Tejun Heo
2015-01-06 21:25 ` [PATCH 05/45] writeback: make backing_dev_info host cgroup-specific bdi_writebacks Tejun Heo
2015-01-06 21:25 ` [PATCH 06/45] writeback, blkcg: associate each blkcg_gq with the corresponding bdi_writeback Tejun Heo
2015-01-06 21:25 ` [PATCH 07/45] writeback: attribute stats to the matching per-cgroup bdi_writeback Tejun Heo
2015-01-06 21:25 ` [PATCH 08/45] writeback: let balance_dirty_pages() work on the matching cgroup bdi_writeback Tejun Heo
2015-01-06 21:25 ` [PATCH 09/45] writeback: make congestion functions per bdi_writeback Tejun Heo
2015-01-06 21:25 ` [PATCH 10/45] writeback, blkcg: restructure blk_{set|clear}_queue_congested() Tejun Heo
2015-01-06 21:25 ` [PATCH 11/45] writeback, blkcg: propagate non-root blkcg congestion state Tejun Heo
2015-01-06 21:25 ` [PATCH 12/45] writeback: implement and use mapping_congested() Tejun Heo
2015-01-06 21:25 ` [PATCH 13/45] writeback: implement WB_has_dirty_io wb_state flag Tejun Heo
2015-01-06 21:25 ` [PATCH 14/45] writeback: implement backing_dev_info->tot_write_bandwidth Tejun Heo
2015-01-06 21:25 ` [PATCH 15/45] writeback: make bdi_has_dirty_io() take multiple bdi_writeback's into account Tejun Heo
2015-01-06 21:25 ` [PATCH 16/45] writeback: don't issue wb_writeback_work if clean Tejun Heo
2015-01-06 21:25 ` [PATCH 17/45] writeback: make bdi->min/max_ratio handling cgroup writeback aware Tejun Heo
2015-01-06 21:25 ` [PATCH 18/45] writeback: implement bdi_for_each_wb() Tejun Heo
2015-01-06 21:25 ` [PATCH 19/45] writeback: remove bdi_start_writeback() Tejun Heo
2015-01-06 21:25 ` [PATCH 20/45] writeback: make laptop_mode_timer_fn() handle multiple bdi_writeback's Tejun Heo
2015-01-06 21:25 ` [PATCH 21/45] writeback: make writeback_in_progress() take bdi_writeback instead of backing_dev_info Tejun Heo
2015-01-06 21:25 ` [PATCH 22/45] writeback: make bdi_start_background_writeback() " Tejun Heo
2015-01-06 21:26 ` [PATCH 23/45] writeback: make wakeup_flusher_threads() handle multiple bdi_writeback's Tejun Heo
2015-01-06 21:26 ` [PATCH 24/45] writeback: add wb_writeback_work->auto_free Tejun Heo
2015-01-06 21:26 ` [PATCH 25/45] writeback: implement bdi_wait_for_completion() Tejun Heo
2015-01-06 21:26 ` [PATCH 26/45] writeback: implement wb_wait_for_single_work() Tejun Heo
2015-01-06 21:26 ` [PATCH 27/45] writeback: restructure try_writeback_inodes_sb[_nr]() Tejun Heo
2015-01-06 21:26 ` [PATCH 28/45] writeback: make writeback initiation functions handle multiple bdi_writeback's Tejun Heo
2015-01-06 21:26 ` [PATCH 29/45] writeback: move i_wb_list emptiness test into inode_wb_list_del() from its caller Tejun Heo
2015-01-06 21:26 ` [PATCH 30/45] vfs, writeback: introduce struct inode_wb_link Tejun Heo
2015-01-06 21:26 ` [PATCH 31/45] vfs, writeback: add inode_wb_link->data point to the associated bdi_writeback Tejun Heo
2015-01-06 21:26 ` [PATCH 32/45] vfs, writeback: move inode->dirtied_when into inode->i_wb_link Tejun Heo
2015-01-06 21:26 ` [PATCH 33/45] writeback: minor reorganization of fs/fs-writeback.c Tejun Heo
2015-01-06 21:26 ` [PATCH 34/45] vfs, writeback: implement support for multiple inode_wb_link's Tejun Heo
2015-01-06 21:26 ` [PATCH 35/45] vfs, writeback: implement inode->i_nr_syncs Tejun Heo
2015-01-06 21:26 ` [PATCH 36/45] writeback: dirty inodes against their matching cgroup bdi_writeback's Tejun Heo
2015-01-06 21:26 ` Tejun Heo [this message]
2015-01-06 21:26 ` [PATCH 38/45] writeback: make cyclic writeback cursor cgroup writeback aware Tejun Heo
2015-01-06 21:26 ` [PATCH 39/45] writeback: make DIRTY_PAGES tracking " Tejun Heo
2015-01-06 21:26 ` [PATCH 40/45] writeback: make write_cache_pages() " Tejun Heo
2015-01-06 21:26 ` [PATCH 41/45] writeback: make __writeback_single_inode() " Tejun Heo
2015-01-06 21:26 ` [PATCH 42/45] writeback: make __filemap_fdatawrite_range() croup " Tejun Heo
2015-01-06 21:26 ` [PATCH 43/45] buffer, writeback: make __block_write_full_page() honor cgroup writeback Tejun Heo
2015-01-06 21:26 ` [PATCH 44/45] mpage: make __mpage_writepage() " Tejun Heo
2015-01-06 21:26 ` [PATCH 45/45] ext2: enable cgroup writeback support Tejun Heo
2015-01-06 21:44 ` [PATCHSET RFC block/for-next] writeback: " Tejun Heo
2015-01-07 23:45   ` Dave Chinner
2015-01-09 21:23     ` Tejun Heo
2015-01-10  0:38       ` Dave Chinner
2015-01-10 15:56         ` Tejun Heo
2015-01-10 16:05           ` Tejun Heo
2015-01-08  9:30 ` Jan Kara
2015-01-09 21:36   ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1420579582-8516-38-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=cgroups@vger.kernel.org \
    --cc=clm@fb.com \
    --cc=david@fromorbit.com \
    --cc=fengguang.wu@intel.com \
    --cc=hannes@cmpxchg.org \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lizefan@huawei.com \
    --cc=mhocko@suse.cz \
    --cc=vgoyal@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).