From: Tejun Heo <tj@kernel.org>
To: axboe@kernel.dk
Cc: linux-kernel@vger.kernel.org, jack@suse.cz,
Tejun Heo <tj@kernel.org>, Wu Fengguang <fengguang.wu@intel.com>
Subject: [PATCH 04/10] writeback: move backing_dev_info->wb_lock and ->worklist into bdi_writeback
Date: Tue, 18 Nov 2014 03:37:22 -0500 [thread overview]
Message-ID: <1416299848-22112-5-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1416299848-22112-1-git-send-email-tj@kernel.org>
Currently, a bdi (backing_dev_info) embeds single wb (bdi_writeback)
and the role of the separation is unclear. For cgroup support for
writeback IOs, a bdi will be updated to host multiple wb's where each
wb serves writeback IOs of a different cgroup on the bdi. To achieve
that, a wb should carry all states necessary for servicing writeback
IOs for a cgroup independently.
This patch moves bdi->wb_lock and ->worklist into wb.
* The lock protects bdi->worklist and bdi->wb.dwork scheduling. While
moving, rename it to wb->work_lock as wb->wb_lock is confusing.
Also, move wb->dwork downwards so that it's colocated with the new
->work_lock and ->work_list fields.
* bdi_writeback_workfn() -> wb_workfn()
bdi_wakeup_thread_delayed(bdi) -> wb_wakeup_delayed(wb)
bdi_wakeup_thread(bdi) -> wb_wakeup(wb)
bdi_queue_work(bdi, ...) -> wb_queue_work(wb, ...)
__bdi_start_writeback(bdi, ...) -> __wb_start_writeback(wb, ...)
get_next_work_item(bdi) -> get_next_work_item(wb)
* bdi_wb_shutdown() is renamed to wb_shutdown() and now takes @wb.
The function contained parts which belong to the containing bdi
rather than the wb itself - testing cap_writeback_dirty and
bdi_remove_from_list() invocation. Those are moved to
bdi_unregister().
* bdi_wb_{init|exit}() are renamed to wb_{init|exit}().
Initializations of the moved bdi->wb_lock and ->work_list are
relocated from bdi_init() to wb_init().
* As there's still only one bdi_writeback per backing_dev_info, all
uses of bdi->state are mechanically replaced with bdi->wb.state
introducing no behavior changes.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
---
fs/fs-writeback.c | 84 +++++++++++++++++++++------------------------
include/linux/backing-dev.h | 12 +++----
mm/backing-dev.c | 64 +++++++++++++++++-----------------
3 files changed, 77 insertions(+), 83 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index daa91ae..41c9f1e 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -91,34 +91,33 @@ static inline struct inode *wb_inode(struct list_head *head)
EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepage);
-static void bdi_wakeup_thread(struct backing_dev_info *bdi)
+static void wb_wakeup(struct bdi_writeback *wb)
{
- spin_lock_bh(&bdi->wb_lock);
- if (test_bit(WB_registered, &bdi->wb.state))
- mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
- spin_unlock_bh(&bdi->wb_lock);
+ spin_lock_bh(&wb->work_lock);
+ if (test_bit(WB_registered, &wb->state))
+ mod_delayed_work(bdi_wq, &wb->dwork, 0);
+ spin_unlock_bh(&wb->work_lock);
}
-static void bdi_queue_work(struct backing_dev_info *bdi,
- struct wb_writeback_work *work)
+static void wb_queue_work(struct bdi_writeback *wb,
+ struct wb_writeback_work *work)
{
- trace_writeback_queue(bdi, work);
+ trace_writeback_queue(wb->bdi, work);
- spin_lock_bh(&bdi->wb_lock);
- if (!test_bit(WB_registered, &bdi->wb.state)) {
+ spin_lock_bh(&wb->work_lock);
+ if (!test_bit(WB_registered, &wb->state)) {
if (work->done)
complete(work->done);
goto out_unlock;
}
- list_add_tail(&work->list, &bdi->work_list);
- mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
+ list_add_tail(&work->list, &wb->work_list);
+ mod_delayed_work(bdi_wq, &wb->dwork, 0);
out_unlock:
- spin_unlock_bh(&bdi->wb_lock);
+ spin_unlock_bh(&wb->work_lock);
}
-static void
-__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
- bool range_cyclic, enum wb_reason reason)
+static void __wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
+ bool range_cyclic, enum wb_reason reason)
{
struct wb_writeback_work *work;
@@ -128,8 +127,8 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
*/
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work) {
- trace_writeback_nowork(bdi);
- bdi_wakeup_thread(bdi);
+ trace_writeback_nowork(wb->bdi);
+ wb_wakeup(wb);
return;
}
@@ -138,7 +137,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
work->range_cyclic = range_cyclic;
work->reason = reason;
- bdi_queue_work(bdi, work);
+ wb_queue_work(wb, work);
}
/**
@@ -156,7 +155,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
enum wb_reason reason)
{
- __bdi_start_writeback(bdi, nr_pages, true, reason);
+ __wb_start_writeback(&bdi->wb, nr_pages, true, reason);
}
/**
@@ -176,7 +175,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
* writeback as soon as there is no other work to do.
*/
trace_writeback_wake_background(bdi);
- bdi_wakeup_thread(bdi);
+ wb_wakeup(&bdi->wb);
}
/*
@@ -848,7 +847,7 @@ static long wb_writeback(struct bdi_writeback *wb,
* after the other works are all done.
*/
if ((work->for_background || work->for_kupdate) &&
- !list_empty(&wb->bdi->work_list))
+ !list_empty(&wb->work_list))
break;
/*
@@ -919,18 +918,17 @@ static long wb_writeback(struct bdi_writeback *wb,
/*
* Return the next wb_writeback_work struct that hasn't been processed yet.
*/
-static struct wb_writeback_work *
-get_next_work_item(struct backing_dev_info *bdi)
+static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
{
struct wb_writeback_work *work = NULL;
- spin_lock_bh(&bdi->wb_lock);
- if (!list_empty(&bdi->work_list)) {
- work = list_entry(bdi->work_list.next,
+ spin_lock_bh(&wb->work_lock);
+ if (!list_empty(&wb->work_list)) {
+ work = list_entry(wb->work_list.next,
struct wb_writeback_work, list);
list_del_init(&work->list);
}
- spin_unlock_bh(&bdi->wb_lock);
+ spin_unlock_bh(&wb->work_lock);
return work;
}
@@ -1002,14 +1000,13 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
*/
static long wb_do_writeback(struct bdi_writeback *wb)
{
- struct backing_dev_info *bdi = wb->bdi;
struct wb_writeback_work *work;
long wrote = 0;
set_bit(WB_writeback_running, &wb->state);
- while ((work = get_next_work_item(bdi)) != NULL) {
+ while ((work = get_next_work_item(wb)) != NULL) {
- trace_writeback_exec(bdi, work);
+ trace_writeback_exec(wb->bdi, work);
wrote += wb_writeback(wb, work);
@@ -1037,43 +1034,42 @@ static long wb_do_writeback(struct bdi_writeback *wb)
* Handle writeback of dirty data for the device backed by this bdi. Also
* reschedules periodically and does kupdated style flushing.
*/
-void bdi_writeback_workfn(struct work_struct *work)
+void wb_workfn(struct work_struct *work)
{
struct bdi_writeback *wb = container_of(to_delayed_work(work),
struct bdi_writeback, dwork);
- struct backing_dev_info *bdi = wb->bdi;
long pages_written;
- set_worker_desc("flush-%s", dev_name(bdi->dev));
+ set_worker_desc("flush-%s", dev_name(wb->bdi->dev));
current->flags |= PF_SWAPWRITE;
if (likely(!current_is_workqueue_rescuer() ||
!test_bit(WB_registered, &wb->state))) {
/*
- * The normal path. Keep writing back @bdi until its
+ * The normal path. Keep writing back @wb until its
* work_list is empty. Note that this path is also taken
- * if @bdi is shutting down even when we're running off the
+ * if @wb is shutting down even when we're running off the
* rescuer as work_list needs to be drained.
*/
do {
pages_written = wb_do_writeback(wb);
trace_writeback_pages_written(pages_written);
- } while (!list_empty(&bdi->work_list));
+ } while (!list_empty(&wb->work_list));
} else {
/*
* bdi_wq can't get enough workers and we're running off
* the emergency worker. Don't hog it. Hopefully, 1024 is
* enough for efficient IO.
*/
- pages_written = writeback_inodes_wb(&bdi->wb, 1024,
+ pages_written = writeback_inodes_wb(wb, 1024,
WB_REASON_FORKER_THREAD);
trace_writeback_pages_written(pages_written);
}
- if (!list_empty(&bdi->work_list))
+ if (!list_empty(&wb->work_list))
mod_delayed_work(bdi_wq, &wb->dwork, 0);
else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
- bdi_wakeup_thread_delayed(bdi);
+ wb_wakeup_delayed(wb);
current->flags &= ~PF_SWAPWRITE;
}
@@ -1093,7 +1089,7 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
if (!bdi_has_dirty_io(bdi))
continue;
- __bdi_start_writeback(bdi, nr_pages, false, reason);
+ __wb_start_writeback(&bdi->wb, nr_pages, false, reason);
}
rcu_read_unlock();
}
@@ -1228,7 +1224,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
spin_unlock(&bdi->wb.list_lock);
if (wakeup_bdi)
- bdi_wakeup_thread_delayed(bdi);
+ wb_wakeup_delayed(&bdi->wb);
return;
}
}
@@ -1318,7 +1314,7 @@ void writeback_inodes_sb_nr(struct super_block *sb,
if (sb->s_bdi == &noop_backing_dev_info)
return;
WARN_ON(!rwsem_is_locked(&sb->s_umount));
- bdi_queue_work(sb->s_bdi, &work);
+ wb_queue_work(&sb->s_bdi->wb, &work);
wait_for_completion(&done);
}
EXPORT_SYMBOL(writeback_inodes_sb_nr);
@@ -1402,7 +1398,7 @@ void sync_inodes_sb(struct super_block *sb)
return;
WARN_ON(!rwsem_is_locked(&sb->s_umount));
- bdi_queue_work(sb->s_bdi, &work);
+ wb_queue_work(&sb->s_bdi->wb, &work);
wait_for_completion(&done);
wait_sb_inodes(sb);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index a077a8d..6aba0d3 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -52,7 +52,6 @@ struct bdi_writeback {
unsigned long state; /* Always use atomic bitops on this */
unsigned long last_old_flush; /* last old data flush */
- struct delayed_work dwork; /* work item used for writeback */
struct list_head b_dirty; /* dirty inodes */
struct list_head b_io; /* parked for writeback */
struct list_head b_more_io; /* parked for more writeback */
@@ -77,6 +76,10 @@ struct bdi_writeback {
struct fprop_local_percpu completions;
int dirty_exceeded;
+
+ spinlock_t work_lock; /* protects work_list & dwork scheduling */
+ struct list_head work_list;
+ struct delayed_work dwork; /* work item used for writeback */
};
struct backing_dev_info {
@@ -92,9 +95,6 @@ struct backing_dev_info {
unsigned int max_ratio, max_prop_frac;
struct bdi_writeback wb; /* default writeback info for this bdi */
- spinlock_t wb_lock; /* protects work_list & wb.dwork scheduling */
-
- struct list_head work_list;
struct device *dev;
@@ -118,9 +118,9 @@ int __must_check bdi_setup_and_register(struct backing_dev_info *, char *, unsig
void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
enum wb_reason reason);
void bdi_start_background_writeback(struct backing_dev_info *bdi);
-void bdi_writeback_workfn(struct work_struct *work);
+void wb_workfn(struct work_struct *work);
int bdi_has_dirty_io(struct backing_dev_info *bdi);
-void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
+void wb_wakeup_delayed(struct bdi_writeback *wb);
extern spinlock_t bdi_lock;
extern struct list_head bdi_list;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 7b9b10e..4904456 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -278,7 +278,7 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
}
/*
- * This function is used when the first inode for this bdi is marked dirty. It
+ * This function is used when the first inode for this wb is marked dirty. It
* wakes-up the corresponding bdi thread which should then take care of the
* periodic background write-out of dirty inodes. Since the write-out would
* starts only 'dirty_writeback_interval' centisecs from now anyway, we just
@@ -291,15 +291,15 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
* We have to be careful not to postpone flush work if it is scheduled for
* earlier. Thus we use queue_delayed_work().
*/
-void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
+void wb_wakeup_delayed(struct bdi_writeback *wb)
{
unsigned long timeout;
timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
- spin_lock_bh(&bdi->wb_lock);
- if (test_bit(WB_registered, &bdi->wb.state))
- queue_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);
- spin_unlock_bh(&bdi->wb_lock);
+ spin_lock_bh(&wb->work_lock);
+ if (test_bit(WB_registered, &wb->state))
+ queue_delayed_work(bdi_wq, &wb->dwork, timeout);
+ spin_unlock_bh(&wb->work_lock);
}
/*
@@ -352,30 +352,22 @@ EXPORT_SYMBOL(bdi_register_dev);
/*
* Remove bdi from the global list and shutdown any threads we have running
*/
-static void bdi_wb_shutdown(struct backing_dev_info *bdi)
+static void wb_shutdown(struct bdi_writeback *wb)
{
- if (!bdi_cap_writeback_dirty(bdi))
- return;
-
- /*
- * Make sure nobody finds us on the bdi_list anymore
- */
- bdi_remove_from_list(bdi);
-
/* Make sure nobody queues further work */
- spin_lock_bh(&bdi->wb_lock);
- clear_bit(WB_registered, &bdi->wb.state);
- spin_unlock_bh(&bdi->wb_lock);
+ spin_lock_bh(&wb->work_lock);
+ clear_bit(WB_registered, &wb->state);
+ spin_unlock_bh(&wb->work_lock);
/*
- * Drain work list and shutdown the delayed_work. At this point,
- * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi
- * is dying and its work_list needs to be drained no matter what.
+ * Drain work list and shutdown the delayed_work. !WB_registered
+ * tells wb_workfn() that @wb is dying and its work_list needs to
+ * be drained no matter what.
*/
- mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
- flush_delayed_work(&bdi->wb.dwork);
- WARN_ON(!list_empty(&bdi->work_list));
- WARN_ON(delayed_work_pending(&bdi->wb.dwork));
+ mod_delayed_work(bdi_wq, &wb->dwork, 0);
+ flush_delayed_work(&wb->dwork);
+ WARN_ON(!list_empty(&wb->work_list));
+ WARN_ON(delayed_work_pending(&wb->dwork));
}
/*
@@ -400,7 +392,12 @@ void bdi_unregister(struct backing_dev_info *bdi)
trace_writeback_bdi_unregister(bdi);
bdi_prune_sb(bdi);
- bdi_wb_shutdown(bdi);
+ if (bdi_cap_writeback_dirty(bdi)) {
+ /* make sure nobody finds us on the bdi_list anymore */
+ bdi_remove_from_list(bdi);
+ wb_shutdown(&bdi->wb);
+ }
+
bdi_debug_unregister(bdi);
device_unregister(bdi->dev);
bdi->dev = NULL;
@@ -413,7 +410,7 @@ EXPORT_SYMBOL(bdi_unregister);
*/
#define INIT_BW (100 << (20 - PAGE_SHIFT))
-static int bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
+static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
{
int i, err;
@@ -425,7 +422,6 @@ static int bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
INIT_LIST_HEAD(&wb->b_io);
INIT_LIST_HEAD(&wb->b_more_io);
spin_lock_init(&wb->list_lock);
- INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
wb->bw_time_stamp = jiffies;
wb->balanced_dirty_ratelimit = INIT_BW;
@@ -433,6 +429,10 @@ static int bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
wb->write_bandwidth = INIT_BW;
wb->avg_write_bandwidth = INIT_BW;
+ spin_lock_init(&wb->work_lock);
+ INIT_LIST_HEAD(&wb->work_list);
+ INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
+
err = fprop_local_init_percpu(&wb->completions, GFP_KERNEL);
if (err)
return err;
@@ -450,7 +450,7 @@ static int bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
return 0;
}
-static void bdi_wb_exit(struct bdi_writeback *wb)
+static void wb_exit(struct bdi_writeback *wb)
{
int i;
@@ -471,11 +471,9 @@ int bdi_init(struct backing_dev_info *bdi)
bdi->min_ratio = 0;
bdi->max_ratio = 100;
bdi->max_prop_frac = FPROP_FRAC_BASE;
- spin_lock_init(&bdi->wb_lock);
INIT_LIST_HEAD(&bdi->bdi_list);
- INIT_LIST_HEAD(&bdi->work_list);
- err = bdi_wb_init(&bdi->wb, bdi);
+ err = wb_init(&bdi->wb, bdi);
if (err)
return err;
@@ -510,7 +508,7 @@ void bdi_destroy(struct backing_dev_info *bdi)
}
bdi_unregister(bdi);
- bdi_wb_exit(&bdi->wb);
+ wb_exit(&bdi->wb);
}
EXPORT_SYMBOL(bdi_destroy);
--
1.9.3
next prev parent reply other threads:[~2014-11-18 8:40 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-11-18 8:37 [PATCHSET block/for-next] writeback: prepare for cgroup writeback support Tejun Heo
2014-11-18 8:37 ` [Drbd-dev] [PATCH 01/10] writeback: move backing_dev_info->state into bdi_writeback Tejun Heo
2014-11-18 8:37 ` Tejun Heo
2014-11-20 15:27 ` [Drbd-dev] " Jan Kara
2014-11-20 15:27 ` Jan Kara
2014-11-20 15:38 ` [Drbd-dev] " Tejun Heo
2014-11-20 15:38 ` Tejun Heo
2014-11-25 1:20 ` [Drbd-dev] " NeilBrown
2014-11-25 1:20 ` NeilBrown
2014-11-18 8:37 ` [PATCH 02/10] writeback: move backing_dev_info->bdi_stat[] " Tejun Heo
2014-11-20 15:31 ` Jan Kara
2014-11-18 8:37 ` [PATCH 03/10] writeback: move bandwidth related fields from backing_dev_info " Tejun Heo
2014-11-20 15:52 ` Jan Kara
2014-11-20 16:01 ` Tejun Heo
2014-11-18 8:37 ` Tejun Heo [this message]
2014-11-20 16:01 ` [PATCH 04/10] writeback: move backing_dev_info->wb_lock and ->worklist " Jan Kara
2014-11-20 16:02 ` Tejun Heo
2014-11-20 16:10 ` Jan Kara
2014-11-18 8:37 ` [PATCH 05/10] writeback: move lingering dirty IO lists transfer from bdi_destroy() to wb_exit() Tejun Heo
2014-11-18 8:37 ` [PATCH 06/10] writeback: reorganize mm/backing-dev.c Tejun Heo
2014-11-18 8:37 ` [PATCH 07/10] writeback: separate out include/linux/backing-dev-defs.h Tejun Heo
2014-11-18 8:37 ` [PATCH 08/10] writeback: cosmetic change in account_page_dirtied() Tejun Heo
2014-11-18 8:37 ` [PATCH 09/10] writeback: add @gfp to wb_init() Tejun Heo
2014-11-18 8:37 ` [PATCH 10/10] writeback: move inode_to_bdi() to include/linux/backing-dev.h Tejun Heo
2014-11-20 15:13 ` [PATCHSET block/for-next] writeback: prepare for cgroup writeback support Jan Kara
2014-11-20 15:14 ` Tejun Heo
2014-11-20 15:44 ` Christoph Hellwig
2014-11-20 16:21 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1416299848-22112-5-git-send-email-tj@kernel.org \
--to=tj@kernel.org \
--cc=axboe@kernel.dk \
--cc=fengguang.wu@intel.com \
--cc=jack@suse.cz \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.