All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: axboe@kernel.dk
Cc: linux-kernel@vger.kernel.org, jack@suse.cz, hch@infradead.org,
	hannes@cmpxchg.org, linux-fsdevel@vger.kernel.org,
	vgoyal@redhat.com, lizefan@huawei.com, cgroups@vger.kernel.org,
	linux-mm@kvack.org, mhocko@suse.cz, clm@fb.com,
	fengguang.wu@intel.com, david@fromorbit.com,
	Tejun Heo <tj@kernel.org>,
	Alexander Viro <viro@zeniv.linux.org.uk>
Subject: [PATCH 35/45] vfs, writeback: implement inode->i_nr_syncs
Date: Tue,  6 Jan 2015 16:26:12 -0500	[thread overview]
Message-ID: <1420579582-8516-36-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1420579582-8516-1-git-send-email-tj@kernel.org>

Currently, I_SYNC is used to keep track of whether writeback is in
progress on the inode or not.  With cgroup writeback support, multiple
writebacks could be in progress simultaneously and a single bit in
inode->i_state isn't sufficient.

If CONFIG_CGROUP_WRITEBACK, this patch makes each iwbl (inode_wb_link)
track whether writeback is in progress using the new IWBL_SYNC flag on
iwbl->data and inode->i_nr_syncs aggregate total number of writebacks
in progress on the inode.  New helpers, iwbl_{test|set|clear}_sync(),
iwbl_sync_wakeup() and __iwbl_wait_for_writeback() are added to
manipulate these states and inode_sleep_on_writeback() is converted to
iwbl_sleep_on_writeback().  I_SYNC retains the same meaning - it's set
if any writeback is in progress and cleared if none.

If !CONFIG_CGROUP_WRITEBACK, the helpers simply operate on I_SYNC
directly and there's no behavioral changes compared to before.  When
CONFIG_CGROUP_WRITEBACK, this adds an atomic_t to struct inode.  This
competes for the same left over 4 byte slot w/ i_readcount from
CONFIG_IMA on 64 bit, and, as long as CONFIG_IMA isn't enabled, it
doesn't increase the size of struct inode on 64 bit.

This allows keeping track of writeback in-progress state per cgroup
bdi_writeback.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
---
 fs/fs-writeback.c                | 172 +++++++++++++++++++++++++++++++++------
 include/linux/backing-dev-defs.h |   7 ++
 include/linux/fs.h               |   3 +
 mm/backing-dev.c                 |   1 +
 4 files changed, 160 insertions(+), 23 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d10c231..df99b5b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -430,26 +430,113 @@ restart:
 	rcu_read_unlock();
 }
 
+/**
+ * iwbl_test_sync - test whether writeback is in progress on an inode_wb_link
+ * @iwbl: target inode_wb_link
+ *
+ * Test whether writeback is in progress for the inode on the bdi_writeback
+ * specified by @iwbl.  The caller is responsible for synchornization.
+ */
+static bool iwbl_test_sync(struct inode_wb_link *iwbl)
+{
+	return test_bit(IWBL_SYNC, &iwbl->data);
+}
+
+/**
+ * iwbl_set_sync - mark an inode_wb_link that writeback is in progress
+ * @iwbl: target inode_wb_link
+ * @inode: inode @iwbl is associated with
+ *
+ * Mark that writeback is in progress for @inode on the bdi_writeback
+ * specified by @iwbl.  iwbl_test_sync() will return %true on @iwbl and
+ * %I_SYNC is set on @inode while there's any writeback in progress on it.
+ */
+static void iwbl_set_sync(struct inode_wb_link *iwbl, struct inode *inode)
+{
+	lockdep_assert_held(&inode->i_lock);
+	WARN_ON_ONCE(iwbl_test_sync(iwbl));
+
+	set_bit(IWBL_SYNC, &iwbl->data);
+	inode->i_nr_syncs++;
+	inode->i_state |= I_SYNC;
+}
+
+/**
+ * iwbl_clear_sync - undo iwbl_set_sync()
+ * @iwbl: target inode_wb_link
+ * @inode: inode @iwbl is associated with
+ *
+ * Returns %true if this was the last writeback in progress on @inode;
+ * %false, otherwise.
+ */
+static bool iwbl_clear_sync(struct inode_wb_link *iwbl, struct inode *inode)
+{
+	bool sync_complete;
+
+	lockdep_assert_held(&inode->i_lock);
+	WARN_ON_ONCE(!iwbl_test_sync(iwbl));
+
+	clear_bit(IWBL_SYNC, &iwbl->data);
+	sync_complete = !--inode->i_nr_syncs;
+	if (sync_complete)
+		inode->i_state &= ~I_SYNC;
+	return sync_complete;
+}
+
+/**
+ * iwbl_wait_for_writeback - wait for writeback in progree on an inode_wb_link
+ * @iwbl: target inode_wb_link
+ *
+ * Wait for the writeback in progress for the inode on the bdi_writeback
+ * specified by @iwbl.
+ */
+static void iwbl_wait_for_writeback(struct inode_wb_link *iwbl)
+	__releases(inode->i_lock)
+	__acquires(inode->i_lock)
+{
+	struct inode *inode = iwbl_to_inode(iwbl);
+	DEFINE_WAIT_BIT(wq, &iwbl->data, IWBL_SYNC);
+	wait_queue_head_t *wqh;
+
+	lockdep_assert_held(&inode->i_lock);
+
+	wqh = bit_waitqueue(&iwbl->data, IWBL_SYNC);
+	while (test_bit(IWBL_SYNC, &iwbl->data)) {
+		spin_unlock(&inode->i_lock);
+		__wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
+		spin_lock(&inode->i_lock);
+	}
+}
+
 /*
  * Sleep until I_SYNC is cleared. This function must be called with i_lock
  * held and drops it. It is aimed for callers not holding any inode reference
  * so once i_lock is dropped, inode can go away.
  */
-static void inode_sleep_on_writeback(struct inode *inode)
-	__releases(inode->i_lock)
+static void iwbl_sleep_on_writeback(struct inode_wb_link *iwbl)
 {
 	DEFINE_WAIT(wait);
-	wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
+	struct inode *inode = iwbl_to_inode(iwbl);
+	wait_queue_head_t *wqh = bit_waitqueue(&iwbl->data, IWBL_SYNC);
 	int sleep;
 
 	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
-	sleep = inode->i_state & I_SYNC;
+	sleep = test_bit(IWBL_SYNC, &iwbl->data);
 	spin_unlock(&inode->i_lock);
 	if (sleep)
 		schedule();
 	finish_wait(wqh, &wait);
 }
 
+/**
+ * iwbl_sync_wakeup - wakeup iwbl_{wait_for|sleep_on}_writeback() waiter
+ * @iwbl: target inode_wb_link
+ */
+static void iwbl_sync_wakeup(struct inode_wb_link *iwbl)
+{
+	wake_up_bit(&iwbl->data, IWBL_SYNC);
+}
+
 static inline struct inode_cgwb_link *icgwbl_first(struct inode *inode)
 {
 	struct hlist_node *node =
@@ -504,6 +591,7 @@ static void inode_icgwbls_del(struct inode *inode)
 	 * bdi->icgwbls_lock.
 	 */
 	inode_for_each_icgwbl(icgwbl, next, inode) {
+		WARN_ON_ONCE(test_bit(IWBL_SYNC, &icgwbl->iwbl.data));
 		hlist_del_rcu(&icgwbl->inode_node);
 		list_move(&icgwbl->wb_node, &to_free);
 	}
@@ -544,15 +632,39 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
 	}
 }
 
+static bool iwbl_test_sync(struct inode_wb_link *iwbl)
+{
+	struct inode *inode = iwbl_to_inode(iwbl);
+
+	return inode->i_state & I_SYNC;
+}
+
+static void iwbl_set_sync(struct inode_wb_link *iwbl, struct inode *inode)
+{
+	inode->i_state |= I_SYNC;
+}
+
+static bool iwbl_clear_sync(struct inode_wb_link *iwbl, struct inode *inode)
+{
+	inode->i_state &= ~I_SYNC;
+	return true;
+}
+
+static void iwbl_wait_for_writeback(struct inode_wb_link *iwbl)
+{
+	__inode_wait_for_writeback(iwbl_to_inode(iwbl));
+}
+
 /*
  * Sleep until I_SYNC is cleared. This function must be called with i_lock
  * held and drops it. It is aimed for callers not holding any inode reference
  * so once i_lock is dropped, inode can go away.
  */
-static void inode_sleep_on_writeback(struct inode *inode)
+static void iwbl_sleep_on_writeback(struct inode_wb_link *iwbl)
 	__releases(inode->i_lock)
 {
 	DEFINE_WAIT(wait);
+	struct inode *inode = iwbl_to_inode(iwbl);
 	wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
 	int sleep;
 
@@ -564,6 +676,11 @@ static void inode_sleep_on_writeback(struct inode *inode)
 	finish_wait(wqh, &wait);
 }
 
+static void iwbl_sync_wakeup(struct inode_wb_link *iwbl)
+{
+	/* noop, __I_SYNC wakeup is enough */
+}
+
 static void inode_icgwbls_del(struct inode *inode)
 {
 }
@@ -700,14 +817,22 @@ static void requeue_io(struct inode_wb_link *iwbl, struct bdi_writeback *wb)
 	iwbl_move_locked(iwbl, wb, &wb->b_more_io);
 }
 
-static void inode_sync_complete(struct inode *inode)
+static void iwbl_sync_complete(struct inode_wb_link *iwbl)
 {
-	inode->i_state &= ~I_SYNC;
+	struct inode *inode = iwbl_to_inode(iwbl);
+	bool sync_complete;
+
+	sync_complete = iwbl_clear_sync(iwbl, inode);
 	/* If inode is clean an unused, put it into LRU now... */
-	inode_add_lru(inode);
+	if (sync_complete)
+		inode_add_lru(inode);
+
 	/* Waiters must see I_SYNC cleared before being woken up */
 	smp_mb();
-	wake_up_bit(&inode->i_state, __I_SYNC);
+
+	iwbl_sync_wakeup(iwbl);
+	if (sync_complete)
+		wake_up_bit(&inode->i_state, __I_SYNC);
 }
 
 static bool iwbl_dirtied_after(struct inode_wb_link *iwbl, unsigned long t)
@@ -888,17 +1013,18 @@ static void requeue_inode(struct inode_wb_link *iwbl, struct bdi_writeback *wb,
 /*
  * Write out an inode and its dirty pages. Do not update the writeback list
  * linkage. That is left to the caller. The caller is also responsible for
- * setting I_SYNC flag and calling inode_sync_complete() to clear it.
+ * setting I_SYNC flag and calling iwbl_sync_complete() to clear it.
  */
 static int
 __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct address_space *mapping = inode->i_mapping;
+	struct inode_wb_link *iwbl = &inode->i_wb_link;
 	long nr_to_write = wbc->nr_to_write;
 	unsigned dirty;
 	int ret;
 
-	WARN_ON(!(inode->i_state & I_SYNC));
+	WARN_ON(!iwbl_test_sync(iwbl));
 
 	trace_writeback_single_inode_start(inode, wbc, nr_to_write);
 
@@ -976,7 +1102,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 	else
 		WARN_ON(inode->i_state & I_WILL_FREE);
 
-	if (inode->i_state & I_SYNC) {
+	if (iwbl_test_sync(iwbl)) {
 		if (wbc->sync_mode != WB_SYNC_ALL)
 			goto out;
 		/*
@@ -984,9 +1110,9 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 		 * inode reference or inode has I_WILL_FREE set, it cannot go
 		 * away under us.
 		 */
-		__inode_wait_for_writeback(inode);
+		iwbl_wait_for_writeback(iwbl);
 	}
-	WARN_ON(inode->i_state & I_SYNC);
+	WARN_ON(iwbl_test_sync(iwbl));
 	/*
 	 * Skip inode if it is clean and we have no outstanding writeback in
 	 * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this
@@ -999,7 +1125,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 	    (wbc->sync_mode != WB_SYNC_ALL ||
 	     !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
 		goto out;
-	inode->i_state |= I_SYNC;
+	iwbl_set_sync(iwbl, inode);
 	spin_unlock(&inode->i_lock);
 
 	ret = __writeback_single_inode(inode, wbc);
@@ -1013,7 +1139,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 	if (!(inode->i_state & I_DIRTY))
 		iwbl_del_locked(iwbl, wb);
 	spin_unlock(&wb->list_lock);
-	inode_sync_complete(inode);
+	iwbl_sync_complete(iwbl);
 out:
 	spin_unlock(&inode->i_lock);
 	return ret;
@@ -1107,7 +1233,7 @@ static long writeback_sb_inodes(struct super_block *sb,
 			redirty_tail(iwbl, wb);
 			continue;
 		}
-		if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
+		if (iwbl_test_sync(iwbl) && wbc.sync_mode != WB_SYNC_ALL) {
 			/*
 			 * If this inode is locked for writeback and we are not
 			 * doing writeback-for-data-integrity, move it to
@@ -1129,14 +1255,14 @@ static long writeback_sb_inodes(struct super_block *sb,
 		 * are doing WB_SYNC_NONE writeback. So this catches only the
 		 * WB_SYNC_ALL case.
 		 */
-		if (inode->i_state & I_SYNC) {
+		if (iwbl_test_sync(iwbl)) {
 			/* Wait for I_SYNC. This function drops i_lock... */
-			inode_sleep_on_writeback(inode);
+			iwbl_sleep_on_writeback(iwbl);
 			/* Inode may be gone, start again */
 			spin_lock(&wb->list_lock);
 			continue;
 		}
-		inode->i_state |= I_SYNC;
+		iwbl_set_sync(iwbl, inode);
 		spin_unlock(&inode->i_lock);
 
 		write_chunk = writeback_chunk_size(wb, work);
@@ -1156,7 +1282,7 @@ static long writeback_sb_inodes(struct super_block *sb,
 		if (!(inode->i_state & I_DIRTY))
 			wrote++;
 		requeue_inode(iwbl, wb, &wbc);
-		inode_sync_complete(inode);
+		iwbl_sync_complete(iwbl);
 		spin_unlock(&inode->i_lock);
 		cond_resched_lock(&wb->list_lock);
 		/*
@@ -1356,7 +1482,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 			spin_lock(&inode->i_lock);
 			spin_unlock(&wb->list_lock);
 			/* This function drops i_lock... */
-			inode_sleep_on_writeback(inode);
+			iwbl_sleep_on_writeback(iwbl);
 			spin_lock(&wb->list_lock);
 		}
 	}
@@ -1648,7 +1774,7 @@ void mark_inode_dirty_dctx(struct dirty_context *dctx, int flags)
 		 * The unlocker will place the inode on the appropriate
 		 * superblock list, based upon its state.
 		 */
-		if (inode->i_state & I_SYNC)
+		if (iwbl_test_sync(iwbl))
 			goto out_unlock_inode;
 
 		/*
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index e448edc..e3b18f3 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -47,8 +47,15 @@ enum wb_stat_item {
  * IWBL_* flags which occupy the lower bits of inode_wb_link->data.  The
  * upper bits point to bdi_writeback, so the number of these flags
  * determines the minimum alignment of bdi_writeback.
+ *
+ * IWBL_SYNC
+ *
+ *  Tracks whether writeback is in progress for an iwbl.  If this bit is
+ *  set for any iwbl on an inode, the inode's I_SYNC is set too.
  */
 enum {
+	IWBL_SYNC		= 0,
+
 	IWBL_FLAGS_BITS,
 	IWBL_FLAGS_MASK		= (1UL << IWBL_FLAGS_BITS) - 1,
 };
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b394821..4c22824 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -625,6 +625,9 @@ struct inode {
 #ifdef CONFIG_IMA
 	atomic_t		i_readcount; /* struct files open RO */
 #endif
+#ifdef CONFIG_CGROUP_WRITEBACK
+	unsigned int		i_nr_syncs;
+#endif
 	const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
 	struct file_lock	*i_flock;
 	struct address_space	i_data;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index e4db465..1399ad6 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -539,6 +539,7 @@ static void cgwb_exit(struct bdi_writeback *wb)
 	spin_lock_irqsave(&wb->bdi->icgwbls_lock, flags);
 	list_for_each_entry_safe(icgwbl, next, &wb->icgwbls, wb_node) {
 		WARN_ON_ONCE(!list_empty(&icgwbl->iwbl.dirty_list));
+		WARN_ON_ONCE(test_bit(IWBL_SYNC, &icgwbl->iwbl.data));
 		hlist_del_rcu(&icgwbl->inode_node);
 		list_del(&icgwbl->wb_node);
 		kfree_rcu(icgwbl, rcu);
-- 
2.1.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)
From: Tejun Heo <tj@kernel.org>
To: axboe@kernel.dk
Cc: linux-kernel@vger.kernel.org, jack@suse.cz, hch@infradead.org,
	hannes@cmpxchg.org, linux-fsdevel@vger.kernel.org,
	vgoyal@redhat.com, lizefan@huawei.com, cgroups@vger.kernel.org,
	linux-mm@kvack.org, mhocko@suse.cz, clm@fb.com,
	fengguang.wu@intel.com, david@fromorbit.com,
	Tejun Heo <tj@kernel.org>,
	Alexander Viro <viro@zeniv.linux.org.uk>
Subject: [PATCH 35/45] vfs, writeback: implement inode->i_nr_syncs
Date: Tue,  6 Jan 2015 16:26:12 -0500	[thread overview]
Message-ID: <1420579582-8516-36-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1420579582-8516-1-git-send-email-tj@kernel.org>

Currently, I_SYNC is used to keep track of whether writeback is in
progress on the inode or not.  With cgroup writeback support, multiple
writebacks could be in progress simultaneously and a single bit in
inode->i_state isn't sufficient.

If CONFIG_CGROUP_WRITEBACK, this patch makes each iwbl (inode_wb_link)
track whether writeback is in progress using the new IWBL_SYNC flag on
iwbl->data and inode->i_nr_syncs aggregate total number of writebacks
in progress on the inode.  New helpers, iwbl_{test|set|clear}_sync(),
iwbl_sync_wakeup() and __iwbl_wait_for_writeback() are added to
manipulate these states and inode_sleep_on_writeback() is converted to
iwbl_sleep_on_writeback().  I_SYNC retains the same meaning - it's set
if any writeback is in progress and cleared if none.

If !CONFIG_CGROUP_WRITEBACK, the helpers simply operate on I_SYNC
directly and there's no behavioral changes compared to before.  When
CONFIG_CGROUP_WRITEBACK, this adds an atomic_t to struct inode.  This
competes for the same left over 4 byte slot w/ i_readcount from
CONFIG_IMA on 64 bit, and, as long as CONFIG_IMA isn't enabled, it
doesn't increase the size of struct inode on 64 bit.

This allows keeping track of writeback in-progress state per cgroup
bdi_writeback.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
---
 fs/fs-writeback.c                | 172 +++++++++++++++++++++++++++++++++------
 include/linux/backing-dev-defs.h |   7 ++
 include/linux/fs.h               |   3 +
 mm/backing-dev.c                 |   1 +
 4 files changed, 160 insertions(+), 23 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d10c231..df99b5b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -430,26 +430,113 @@ restart:
 	rcu_read_unlock();
 }
 
+/**
+ * iwbl_test_sync - test whether writeback is in progress on an inode_wb_link
+ * @iwbl: target inode_wb_link
+ *
+ * Test whether writeback is in progress for the inode on the bdi_writeback
+ * specified by @iwbl.  The caller is responsible for synchornization.
+ */
+static bool iwbl_test_sync(struct inode_wb_link *iwbl)
+{
+	return test_bit(IWBL_SYNC, &iwbl->data);
+}
+
+/**
+ * iwbl_set_sync - mark an inode_wb_link that writeback is in progress
+ * @iwbl: target inode_wb_link
+ * @inode: inode @iwbl is associated with
+ *
+ * Mark that writeback is in progress for @inode on the bdi_writeback
+ * specified by @iwbl.  iwbl_test_sync() will return %true on @iwbl and
+ * %I_SYNC is set on @inode while there's any writeback in progress on it.
+ */
+static void iwbl_set_sync(struct inode_wb_link *iwbl, struct inode *inode)
+{
+	lockdep_assert_held(&inode->i_lock);
+	WARN_ON_ONCE(iwbl_test_sync(iwbl));
+
+	set_bit(IWBL_SYNC, &iwbl->data);
+	inode->i_nr_syncs++;
+	inode->i_state |= I_SYNC;
+}
+
+/**
+ * iwbl_clear_sync - undo iwbl_set_sync()
+ * @iwbl: target inode_wb_link
+ * @inode: inode @iwbl is associated with
+ *
+ * Returns %true if this was the last writeback in progress on @inode;
+ * %false, otherwise.
+ */
+static bool iwbl_clear_sync(struct inode_wb_link *iwbl, struct inode *inode)
+{
+	bool sync_complete;
+
+	lockdep_assert_held(&inode->i_lock);
+	WARN_ON_ONCE(!iwbl_test_sync(iwbl));
+
+	clear_bit(IWBL_SYNC, &iwbl->data);
+	sync_complete = !--inode->i_nr_syncs;
+	if (sync_complete)
+		inode->i_state &= ~I_SYNC;
+	return sync_complete;
+}
+
+/**
+ * iwbl_wait_for_writeback - wait for writeback in progree on an inode_wb_link
+ * @iwbl: target inode_wb_link
+ *
+ * Wait for the writeback in progress for the inode on the bdi_writeback
+ * specified by @iwbl.
+ */
+static void iwbl_wait_for_writeback(struct inode_wb_link *iwbl)
+	__releases(inode->i_lock)
+	__acquires(inode->i_lock)
+{
+	struct inode *inode = iwbl_to_inode(iwbl);
+	DEFINE_WAIT_BIT(wq, &iwbl->data, IWBL_SYNC);
+	wait_queue_head_t *wqh;
+
+	lockdep_assert_held(&inode->i_lock);
+
+	wqh = bit_waitqueue(&iwbl->data, IWBL_SYNC);
+	while (test_bit(IWBL_SYNC, &iwbl->data)) {
+		spin_unlock(&inode->i_lock);
+		__wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
+		spin_lock(&inode->i_lock);
+	}
+}
+
 /*
  * Sleep until I_SYNC is cleared. This function must be called with i_lock
  * held and drops it. It is aimed for callers not holding any inode reference
  * so once i_lock is dropped, inode can go away.
  */
-static void inode_sleep_on_writeback(struct inode *inode)
-	__releases(inode->i_lock)
+static void iwbl_sleep_on_writeback(struct inode_wb_link *iwbl)
 {
 	DEFINE_WAIT(wait);
-	wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
+	struct inode *inode = iwbl_to_inode(iwbl);
+	wait_queue_head_t *wqh = bit_waitqueue(&iwbl->data, IWBL_SYNC);
 	int sleep;
 
 	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
-	sleep = inode->i_state & I_SYNC;
+	sleep = test_bit(IWBL_SYNC, &iwbl->data);
 	spin_unlock(&inode->i_lock);
 	if (sleep)
 		schedule();
 	finish_wait(wqh, &wait);
 }
 
+/**
+ * iwbl_sync_wakeup - wakeup iwbl_{wait_for|sleep_on}_writeback() waiter
+ * @iwbl: target inode_wb_link
+ */
+static void iwbl_sync_wakeup(struct inode_wb_link *iwbl)
+{
+	wake_up_bit(&iwbl->data, IWBL_SYNC);
+}
+
 static inline struct inode_cgwb_link *icgwbl_first(struct inode *inode)
 {
 	struct hlist_node *node =
@@ -504,6 +591,7 @@ static void inode_icgwbls_del(struct inode *inode)
 	 * bdi->icgwbls_lock.
 	 */
 	inode_for_each_icgwbl(icgwbl, next, inode) {
+		WARN_ON_ONCE(test_bit(IWBL_SYNC, &icgwbl->iwbl.data));
 		hlist_del_rcu(&icgwbl->inode_node);
 		list_move(&icgwbl->wb_node, &to_free);
 	}
@@ -544,15 +632,39 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
 	}
 }
 
+static bool iwbl_test_sync(struct inode_wb_link *iwbl)
+{
+	struct inode *inode = iwbl_to_inode(iwbl);
+
+	return inode->i_state & I_SYNC;
+}
+
+static void iwbl_set_sync(struct inode_wb_link *iwbl, struct inode *inode)
+{
+	inode->i_state |= I_SYNC;
+}
+
+static bool iwbl_clear_sync(struct inode_wb_link *iwbl, struct inode *inode)
+{
+	inode->i_state &= ~I_SYNC;
+	return true;
+}
+
+static void iwbl_wait_for_writeback(struct inode_wb_link *iwbl)
+{
+	__inode_wait_for_writeback(iwbl_to_inode(iwbl));
+}
+
 /*
  * Sleep until I_SYNC is cleared. This function must be called with i_lock
  * held and drops it. It is aimed for callers not holding any inode reference
  * so once i_lock is dropped, inode can go away.
  */
-static void inode_sleep_on_writeback(struct inode *inode)
+static void iwbl_sleep_on_writeback(struct inode_wb_link *iwbl)
 	__releases(inode->i_lock)
 {
 	DEFINE_WAIT(wait);
+	struct inode *inode = iwbl_to_inode(iwbl);
 	wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
 	int sleep;
 
@@ -564,6 +676,11 @@ static void inode_sleep_on_writeback(struct inode *inode)
 	finish_wait(wqh, &wait);
 }
 
+static void iwbl_sync_wakeup(struct inode_wb_link *iwbl)
+{
+	/* noop, __I_SYNC wakeup is enough */
+}
+
 static void inode_icgwbls_del(struct inode *inode)
 {
 }
@@ -700,14 +817,22 @@ static void requeue_io(struct inode_wb_link *iwbl, struct bdi_writeback *wb)
 	iwbl_move_locked(iwbl, wb, &wb->b_more_io);
 }
 
-static void inode_sync_complete(struct inode *inode)
+static void iwbl_sync_complete(struct inode_wb_link *iwbl)
 {
-	inode->i_state &= ~I_SYNC;
+	struct inode *inode = iwbl_to_inode(iwbl);
+	bool sync_complete;
+
+	sync_complete = iwbl_clear_sync(iwbl, inode);
 	/* If inode is clean an unused, put it into LRU now... */
-	inode_add_lru(inode);
+	if (sync_complete)
+		inode_add_lru(inode);
+
 	/* Waiters must see I_SYNC cleared before being woken up */
 	smp_mb();
-	wake_up_bit(&inode->i_state, __I_SYNC);
+
+	iwbl_sync_wakeup(iwbl);
+	if (sync_complete)
+		wake_up_bit(&inode->i_state, __I_SYNC);
 }
 
 static bool iwbl_dirtied_after(struct inode_wb_link *iwbl, unsigned long t)
@@ -888,17 +1013,18 @@ static void requeue_inode(struct inode_wb_link *iwbl, struct bdi_writeback *wb,
 /*
  * Write out an inode and its dirty pages. Do not update the writeback list
  * linkage. That is left to the caller. The caller is also responsible for
- * setting I_SYNC flag and calling inode_sync_complete() to clear it.
+ * setting I_SYNC flag and calling iwbl_sync_complete() to clear it.
  */
 static int
 __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct address_space *mapping = inode->i_mapping;
+	struct inode_wb_link *iwbl = &inode->i_wb_link;
 	long nr_to_write = wbc->nr_to_write;
 	unsigned dirty;
 	int ret;
 
-	WARN_ON(!(inode->i_state & I_SYNC));
+	WARN_ON(!iwbl_test_sync(iwbl));
 
 	trace_writeback_single_inode_start(inode, wbc, nr_to_write);
 
@@ -976,7 +1102,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 	else
 		WARN_ON(inode->i_state & I_WILL_FREE);
 
-	if (inode->i_state & I_SYNC) {
+	if (iwbl_test_sync(iwbl)) {
 		if (wbc->sync_mode != WB_SYNC_ALL)
 			goto out;
 		/*
@@ -984,9 +1110,9 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 		 * inode reference or inode has I_WILL_FREE set, it cannot go
 		 * away under us.
 		 */
-		__inode_wait_for_writeback(inode);
+		iwbl_wait_for_writeback(iwbl);
 	}
-	WARN_ON(inode->i_state & I_SYNC);
+	WARN_ON(iwbl_test_sync(iwbl));
 	/*
 	 * Skip inode if it is clean and we have no outstanding writeback in
 	 * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this
@@ -999,7 +1125,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 	    (wbc->sync_mode != WB_SYNC_ALL ||
 	     !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
 		goto out;
-	inode->i_state |= I_SYNC;
+	iwbl_set_sync(iwbl, inode);
 	spin_unlock(&inode->i_lock);
 
 	ret = __writeback_single_inode(inode, wbc);
@@ -1013,7 +1139,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 	if (!(inode->i_state & I_DIRTY))
 		iwbl_del_locked(iwbl, wb);
 	spin_unlock(&wb->list_lock);
-	inode_sync_complete(inode);
+	iwbl_sync_complete(iwbl);
 out:
 	spin_unlock(&inode->i_lock);
 	return ret;
@@ -1107,7 +1233,7 @@ static long writeback_sb_inodes(struct super_block *sb,
 			redirty_tail(iwbl, wb);
 			continue;
 		}
-		if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
+		if (iwbl_test_sync(iwbl) && wbc.sync_mode != WB_SYNC_ALL) {
 			/*
 			 * If this inode is locked for writeback and we are not
 			 * doing writeback-for-data-integrity, move it to
@@ -1129,14 +1255,14 @@ static long writeback_sb_inodes(struct super_block *sb,
 		 * are doing WB_SYNC_NONE writeback. So this catches only the
 		 * WB_SYNC_ALL case.
 		 */
-		if (inode->i_state & I_SYNC) {
+		if (iwbl_test_sync(iwbl)) {
 			/* Wait for I_SYNC. This function drops i_lock... */
-			inode_sleep_on_writeback(inode);
+			iwbl_sleep_on_writeback(iwbl);
 			/* Inode may be gone, start again */
 			spin_lock(&wb->list_lock);
 			continue;
 		}
-		inode->i_state |= I_SYNC;
+		iwbl_set_sync(iwbl, inode);
 		spin_unlock(&inode->i_lock);
 
 		write_chunk = writeback_chunk_size(wb, work);
@@ -1156,7 +1282,7 @@ static long writeback_sb_inodes(struct super_block *sb,
 		if (!(inode->i_state & I_DIRTY))
 			wrote++;
 		requeue_inode(iwbl, wb, &wbc);
-		inode_sync_complete(inode);
+		iwbl_sync_complete(iwbl);
 		spin_unlock(&inode->i_lock);
 		cond_resched_lock(&wb->list_lock);
 		/*
@@ -1356,7 +1482,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 			spin_lock(&inode->i_lock);
 			spin_unlock(&wb->list_lock);
 			/* This function drops i_lock... */
-			inode_sleep_on_writeback(inode);
+			iwbl_sleep_on_writeback(iwbl);
 			spin_lock(&wb->list_lock);
 		}
 	}
@@ -1648,7 +1774,7 @@ void mark_inode_dirty_dctx(struct dirty_context *dctx, int flags)
 		 * The unlocker will place the inode on the appropriate
 		 * superblock list, based upon its state.
 		 */
-		if (inode->i_state & I_SYNC)
+		if (iwbl_test_sync(iwbl))
 			goto out_unlock_inode;
 
 		/*
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index e448edc..e3b18f3 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -47,8 +47,15 @@ enum wb_stat_item {
  * IWBL_* flags which occupy the lower bits of inode_wb_link->data.  The
  * upper bits point to bdi_writeback, so the number of these flags
  * determines the minimum alignment of bdi_writeback.
+ *
+ * IWBL_SYNC
+ *
+ *  Tracks whether writeback is in progress for an iwbl.  If this bit is
+ *  set for any iwbl on an inode, the inode's I_SYNC is set too.
  */
 enum {
+	IWBL_SYNC		= 0,
+
 	IWBL_FLAGS_BITS,
 	IWBL_FLAGS_MASK		= (1UL << IWBL_FLAGS_BITS) - 1,
 };
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b394821..4c22824 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -625,6 +625,9 @@ struct inode {
 #ifdef CONFIG_IMA
 	atomic_t		i_readcount; /* struct files open RO */
 #endif
+#ifdef CONFIG_CGROUP_WRITEBACK
+	unsigned int		i_nr_syncs;
+#endif
 	const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
 	struct file_lock	*i_flock;
 	struct address_space	i_data;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index e4db465..1399ad6 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -539,6 +539,7 @@ static void cgwb_exit(struct bdi_writeback *wb)
 	spin_lock_irqsave(&wb->bdi->icgwbls_lock, flags);
 	list_for_each_entry_safe(icgwbl, next, &wb->icgwbls, wb_node) {
 		WARN_ON_ONCE(!list_empty(&icgwbl->iwbl.dirty_list));
+		WARN_ON_ONCE(test_bit(IWBL_SYNC, &icgwbl->iwbl.data));
 		hlist_del_rcu(&icgwbl->inode_node);
 		list_del(&icgwbl->wb_node);
 		kfree_rcu(icgwbl, rcu);
-- 
2.1.0


  parent reply	other threads:[~2015-01-06 21:26 UTC|newest]

Thread overview: 110+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-06 21:25 [PATCHSET RFC block/for-next] writeback: cgroup writeback support Tejun Heo
2015-01-06 21:25 ` Tejun Heo
2015-01-06 21:25 ` [PATCH 01/45] writeback: add struct dirty_context Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 02/45] writeback: add {CONFIG|BDI_CAP|FS}_CGROUP_WRITEBACK Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 03/45] memcg: encode page_cgflags in the lower bits of page->mem_cgroup Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 04/45] memcg, writeback: implement memcg_blkcg_ptr Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 05/45] writeback: make backing_dev_info host cgroup-specific bdi_writebacks Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 06/45] writeback, blkcg: associate each blkcg_gq with the corresponding bdi_writeback Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 07/45] writeback: attribute stats to the matching per-cgroup bdi_writeback Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 08/45] writeback: let balance_dirty_pages() work on the matching cgroup bdi_writeback Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 09/45] writeback: make congestion functions per bdi_writeback Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 10/45] writeback, blkcg: restructure blk_{set|clear}_queue_congested() Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 11/45] writeback, blkcg: propagate non-root blkcg congestion state Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 12/45] writeback: implement and use mapping_congested() Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 13/45] writeback: implement WB_has_dirty_io wb_state flag Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 14/45] writeback: implement backing_dev_info->tot_write_bandwidth Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 15/45] writeback: make bdi_has_dirty_io() take multiple bdi_writeback's into account Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 16/45] writeback: don't issue wb_writeback_work if clean Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 17/45] writeback: make bdi->min/max_ratio handling cgroup writeback aware Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 18/45] writeback: implement bdi_for_each_wb() Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 19/45] writeback: remove bdi_start_writeback() Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 20/45] writeback: make laptop_mode_timer_fn() handle multiple bdi_writeback's Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:25 ` [PATCH 22/45] writeback: make bdi_start_background_writeback() take bdi_writeback instead of backing_dev_info Tejun Heo
2015-01-06 21:25   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 23/45] writeback: make wakeup_flusher_threads() handle multiple bdi_writeback's Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 24/45] writeback: add wb_writeback_work->auto_free Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 25/45] writeback: implement bdi_wait_for_completion() Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 26/45] writeback: implement wb_wait_for_single_work() Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 27/45] writeback: restructure try_writeback_inodes_sb[_nr]() Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 28/45] writeback: make writeback initiation functions handle multiple bdi_writeback's Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 29/45] writeback: move i_wb_list emptiness test into inode_wb_list_del() from its caller Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 30/45] vfs, writeback: introduce struct inode_wb_link Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 31/45] vfs, writeback: add inode_wb_link->data point to the associated bdi_writeback Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 32/45] vfs, writeback: move inode->dirtied_when into inode->i_wb_link Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 33/45] writeback: minor reorganization of fs/fs-writeback.c Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 34/45] vfs, writeback: implement support for multiple inode_wb_link's Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` Tejun Heo [this message]
2015-01-06 21:26   ` [PATCH 35/45] vfs, writeback: implement inode->i_nr_syncs Tejun Heo
2015-01-06 21:26 ` [PATCH 36/45] writeback: dirty inodes against their matching cgroup bdi_writeback's Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 37/45] writeback: make writeback_control carry the inode_wb_link being served Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 38/45] writeback: make cyclic writeback cursor cgroup writeback aware Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 39/45] writeback: make DIRTY_PAGES tracking " Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 40/45] writeback: make write_cache_pages() " Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 41/45] writeback: make __writeback_single_inode() " Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 42/45] writeback: make __filemap_fdatawrite_range() croup " Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:26 ` [PATCH 43/45] buffer, writeback: make __block_write_full_page() honor cgroup writeback Tejun Heo
2015-01-06 21:26   ` Tejun Heo
     [not found] ` <1420579582-8516-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2015-01-06 21:25   ` [PATCH 21/45] writeback: make writeback_in_progress() take bdi_writeback instead of backing_dev_info Tejun Heo
2015-01-06 21:25     ` Tejun Heo
2015-01-06 21:25     ` Tejun Heo
2015-01-06 21:26   ` [PATCH 44/45] mpage: make __mpage_writepage() honor cgroup writeback Tejun Heo
2015-01-06 21:26     ` Tejun Heo
2015-01-06 21:26     ` Tejun Heo
2015-01-06 21:26 ` [PATCH 45/45] ext2: enable cgroup writeback support Tejun Heo
2015-01-06 21:26   ` Tejun Heo
2015-01-06 21:44 ` [PATCHSET RFC block/for-next] writeback: " Tejun Heo
2015-01-06 21:44   ` Tejun Heo
2015-01-07 23:45   ` Dave Chinner
2015-01-07 23:45     ` Dave Chinner
2015-01-09 21:23     ` Tejun Heo
2015-01-09 21:23       ` Tejun Heo
2015-01-10  0:38       ` Dave Chinner
2015-01-10  0:38         ` Dave Chinner
2015-01-10 15:56         ` Tejun Heo
2015-01-10 15:56           ` Tejun Heo
2015-01-10 16:05           ` Tejun Heo
2015-01-10 16:05             ` Tejun Heo
2015-01-08  9:30 ` Jan Kara
2015-01-08  9:30   ` Jan Kara
2015-01-09 21:36   ` Tejun Heo
2015-01-09 21:36     ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1420579582-8516-36-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=cgroups@vger.kernel.org \
    --cc=clm@fb.com \
    --cc=david@fromorbit.com \
    --cc=fengguang.wu@intel.com \
    --cc=hannes@cmpxchg.org \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lizefan@huawei.com \
    --cc=mhocko@suse.cz \
    --cc=vgoyal@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.