From: Wu Fengguang <fengguang.wu@intel.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jan Kara <jack@suse.cz>, Jens Axboe <axboe@kernel.dk>,
Chris Mason <chris.mason@oracle.com>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Trond Myklebust <Trond.Myklebust@netapp.com>,
Wu Fengguang <fengguang.wu@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Andrea Righi <arighi@develer.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: linux-mm <linux-mm@kvack.org>
Cc: <linux-fsdevel@vger.kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH 09/27] nfs: writeback pages wait queue
Date: Thu, 03 Mar 2011 14:45:14 +0800 [thread overview]
Message-ID: <20110303074949.809203319@intel.com> (raw)
In-Reply-To: 20110303064505.718671603@intel.com
[-- Attachment #1: writeback-nfs-request-queue.patch --]
[-- Type: text/plain, Size: 5854 bytes --]
The generic writeback routines are departing from congestion_wait()
in preference of get_request_wait(), aka. waiting on the block queues.
Introduce the missing writeback wait queue for NFS, otherwise its
writeback pages will grow out of control, exhausting all PG_dirty pages.
CC: Jens Axboe <axboe@kernel.dk>
CC: Chris Mason <chris.mason@oracle.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
fs/nfs/client.c | 2
fs/nfs/write.c | 93 +++++++++++++++++++++++++++++++-----
include/linux/nfs_fs_sb.h | 1
3 files changed, 85 insertions(+), 11 deletions(-)
--- linux-next.orig/fs/nfs/write.c 2011-03-03 14:03:40.000000000 +0800
+++ linux-next/fs/nfs/write.c 2011-03-03 14:03:40.000000000 +0800
@@ -185,11 +185,68 @@ static int wb_priority(struct writeback_
* NFS congestion control
*/
+#define NFS_WAIT_PAGES (1024L >> (PAGE_SHIFT - 10))
int nfs_congestion_kb;
-#define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10))
-#define NFS_CONGESTION_OFF_THRESH \
- (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
+/*
+ * SYNC requests will block on (2*limit) and wakeup on (2*limit-NFS_WAIT_PAGES)
+ * ASYNC requests will block on (limit) and wakeup on (limit - NFS_WAIT_PAGES)
+ * In this way SYNC writes will never be blocked by ASYNC ones.
+ */
+
+static void nfs_set_congested(long nr, struct backing_dev_info *bdi)
+{
+ long limit = nfs_congestion_kb >> (PAGE_SHIFT - 10);
+
+ if (nr > limit && !test_bit(BDI_async_congested, &bdi->state))
+ set_bdi_congested(bdi, BLK_RW_ASYNC);
+ else if (nr > 2 * limit && !test_bit(BDI_sync_congested, &bdi->state))
+ set_bdi_congested(bdi, BLK_RW_SYNC);
+}
+
+static void nfs_wait_contested(int is_sync,
+ struct backing_dev_info *bdi,
+ wait_queue_head_t *wqh)
+{
+ int waitbit = is_sync ? BDI_sync_congested : BDI_async_congested;
+ DEFINE_WAIT(wait);
+
+ if (!test_bit(waitbit, &bdi->state))
+ return;
+
+ for (;;) {
+ prepare_to_wait(&wqh[is_sync], &wait, TASK_UNINTERRUPTIBLE);
+ if (!test_bit(waitbit, &bdi->state))
+ break;
+
+ io_schedule();
+ }
+ finish_wait(&wqh[is_sync], &wait);
+}
+
+static void nfs_wakeup_congested(long nr,
+ struct backing_dev_info *bdi,
+ wait_queue_head_t *wqh)
+{
+ long limit = nfs_congestion_kb >> (PAGE_SHIFT - 10);
+
+ if (nr < 2 * limit - min(limit / 8, NFS_WAIT_PAGES)) {
+ if (test_bit(BDI_sync_congested, &bdi->state)) {
+ clear_bdi_congested(bdi, BLK_RW_SYNC);
+ smp_mb__after_clear_bit();
+ }
+ if (waitqueue_active(&wqh[BLK_RW_SYNC]))
+ wake_up(&wqh[BLK_RW_SYNC]);
+ }
+ if (nr < limit - min(limit / 8, NFS_WAIT_PAGES)) {
+ if (test_bit(BDI_async_congested, &bdi->state)) {
+ clear_bdi_congested(bdi, BLK_RW_ASYNC);
+ smp_mb__after_clear_bit();
+ }
+ if (waitqueue_active(&wqh[BLK_RW_ASYNC]))
+ wake_up(&wqh[BLK_RW_ASYNC]);
+ }
+}
static int nfs_set_page_writeback(struct page *page)
{
@@ -200,11 +257,8 @@ static int nfs_set_page_writeback(struct
struct nfs_server *nfss = NFS_SERVER(inode);
page_cache_get(page);
- if (atomic_long_inc_return(&nfss->writeback) >
- NFS_CONGESTION_ON_THRESH) {
- set_bdi_congested(&nfss->backing_dev_info,
- BLK_RW_ASYNC);
- }
+ nfs_set_congested(atomic_long_inc_return(&nfss->writeback),
+ &nfss->backing_dev_info);
}
return ret;
}
@@ -216,8 +270,10 @@ static void nfs_end_page_writeback(struc
end_page_writeback(page);
page_cache_release(page);
- if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
- clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
+
+ nfs_wakeup_congested(atomic_long_dec_return(&nfss->writeback),
+ &nfss->backing_dev_info,
+ nfss->writeback_wait);
}
static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
@@ -318,19 +374,34 @@ static int nfs_writepage_locked(struct p
int nfs_writepage(struct page *page, struct writeback_control *wbc)
{
+ struct inode *inode = page->mapping->host;
+ struct nfs_server *nfss = NFS_SERVER(inode);
int ret;
ret = nfs_writepage_locked(page, wbc);
unlock_page(page);
+
+ nfs_wait_contested(wbc->sync_mode == WB_SYNC_ALL,
+ &nfss->backing_dev_info,
+ nfss->writeback_wait);
+
return ret;
}
-static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
+static int nfs_writepages_callback(struct page *page,
+ struct writeback_control *wbc, void *data)
{
+ struct inode *inode = page->mapping->host;
+ struct nfs_server *nfss = NFS_SERVER(inode);
int ret;
ret = nfs_do_writepage(page, wbc, data);
unlock_page(page);
+
+ nfs_wait_contested(wbc->sync_mode == WB_SYNC_ALL,
+ &nfss->backing_dev_info,
+ nfss->writeback_wait);
+
return ret;
}
--- linux-next.orig/include/linux/nfs_fs_sb.h 2011-03-03 14:03:38.000000000 +0800
+++ linux-next/include/linux/nfs_fs_sb.h 2011-03-03 14:03:40.000000000 +0800
@@ -102,6 +102,7 @@ struct nfs_server {
struct nfs_iostats __percpu *io_stats; /* I/O statistics */
struct backing_dev_info backing_dev_info;
atomic_long_t writeback; /* number of writeback pages */
+ wait_queue_head_t writeback_wait[2];
int flags; /* various flags */
unsigned int caps; /* server capabilities */
unsigned int rsize; /* read size */
--- linux-next.orig/fs/nfs/client.c 2011-03-03 14:03:38.000000000 +0800
+++ linux-next/fs/nfs/client.c 2011-03-03 14:03:40.000000000 +0800
@@ -1042,6 +1042,8 @@ static struct nfs_server *nfs_alloc_serv
INIT_LIST_HEAD(&server->delegations);
atomic_set(&server->active, 0);
+ init_waitqueue_head(&server->writeback_wait[BLK_RW_SYNC]);
+ init_waitqueue_head(&server->writeback_wait[BLK_RW_ASYNC]);
server->io_stats = nfs_alloc_iostats();
if (!server->io_stats) {
WARNING: multiple messages have this Message-ID (diff)
From: Wu Fengguang <fengguang.wu@intel.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jan Kara <jack@suse.cz>, Jens Axboe <axboe@kernel.dk>,
Chris Mason <chris.mason@oracle.com>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Trond Myklebust <Trond.Myklebust@netapp.com>,
Wu Fengguang <fengguang.wu@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Andrea Righi <arighi@develer.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: linux-mm <linux-mm@kvack.org>
Cc: <linux-fsdevel@vger.kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH 09/27] nfs: writeback pages wait queue
Date: Thu, 03 Mar 2011 14:45:14 +0800 [thread overview]
Message-ID: <20110303074949.809203319@intel.com> (raw)
In-Reply-To: 20110303064505.718671603@intel.com
[-- Attachment #1: writeback-nfs-request-queue.patch --]
[-- Type: text/plain, Size: 6157 bytes --]
The generic writeback routines are departing from congestion_wait()
in preference of get_request_wait(), aka. waiting on the block queues.
Introduce the missing writeback wait queue for NFS, otherwise its
writeback pages will grow out of control, exhausting all PG_dirty pages.
CC: Jens Axboe <axboe@kernel.dk>
CC: Chris Mason <chris.mason@oracle.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
fs/nfs/client.c | 2
fs/nfs/write.c | 93 +++++++++++++++++++++++++++++++-----
include/linux/nfs_fs_sb.h | 1
3 files changed, 85 insertions(+), 11 deletions(-)
--- linux-next.orig/fs/nfs/write.c 2011-03-03 14:03:40.000000000 +0800
+++ linux-next/fs/nfs/write.c 2011-03-03 14:03:40.000000000 +0800
@@ -185,11 +185,68 @@ static int wb_priority(struct writeback_
* NFS congestion control
*/
+#define NFS_WAIT_PAGES (1024L >> (PAGE_SHIFT - 10))
int nfs_congestion_kb;
-#define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10))
-#define NFS_CONGESTION_OFF_THRESH \
- (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
+/*
+ * SYNC requests will block on (2*limit) and wakeup on (2*limit-NFS_WAIT_PAGES)
+ * ASYNC requests will block on (limit) and wakeup on (limit - NFS_WAIT_PAGES)
+ * In this way SYNC writes will never be blocked by ASYNC ones.
+ */
+
+static void nfs_set_congested(long nr, struct backing_dev_info *bdi)
+{
+ long limit = nfs_congestion_kb >> (PAGE_SHIFT - 10);
+
+ if (nr > limit && !test_bit(BDI_async_congested, &bdi->state))
+ set_bdi_congested(bdi, BLK_RW_ASYNC);
+ else if (nr > 2 * limit && !test_bit(BDI_sync_congested, &bdi->state))
+ set_bdi_congested(bdi, BLK_RW_SYNC);
+}
+
+static void nfs_wait_contested(int is_sync,
+ struct backing_dev_info *bdi,
+ wait_queue_head_t *wqh)
+{
+ int waitbit = is_sync ? BDI_sync_congested : BDI_async_congested;
+ DEFINE_WAIT(wait);
+
+ if (!test_bit(waitbit, &bdi->state))
+ return;
+
+ for (;;) {
+ prepare_to_wait(&wqh[is_sync], &wait, TASK_UNINTERRUPTIBLE);
+ if (!test_bit(waitbit, &bdi->state))
+ break;
+
+ io_schedule();
+ }
+ finish_wait(&wqh[is_sync], &wait);
+}
+
+static void nfs_wakeup_congested(long nr,
+ struct backing_dev_info *bdi,
+ wait_queue_head_t *wqh)
+{
+ long limit = nfs_congestion_kb >> (PAGE_SHIFT - 10);
+
+ if (nr < 2 * limit - min(limit / 8, NFS_WAIT_PAGES)) {
+ if (test_bit(BDI_sync_congested, &bdi->state)) {
+ clear_bdi_congested(bdi, BLK_RW_SYNC);
+ smp_mb__after_clear_bit();
+ }
+ if (waitqueue_active(&wqh[BLK_RW_SYNC]))
+ wake_up(&wqh[BLK_RW_SYNC]);
+ }
+ if (nr < limit - min(limit / 8, NFS_WAIT_PAGES)) {
+ if (test_bit(BDI_async_congested, &bdi->state)) {
+ clear_bdi_congested(bdi, BLK_RW_ASYNC);
+ smp_mb__after_clear_bit();
+ }
+ if (waitqueue_active(&wqh[BLK_RW_ASYNC]))
+ wake_up(&wqh[BLK_RW_ASYNC]);
+ }
+}
static int nfs_set_page_writeback(struct page *page)
{
@@ -200,11 +257,8 @@ static int nfs_set_page_writeback(struct
struct nfs_server *nfss = NFS_SERVER(inode);
page_cache_get(page);
- if (atomic_long_inc_return(&nfss->writeback) >
- NFS_CONGESTION_ON_THRESH) {
- set_bdi_congested(&nfss->backing_dev_info,
- BLK_RW_ASYNC);
- }
+ nfs_set_congested(atomic_long_inc_return(&nfss->writeback),
+ &nfss->backing_dev_info);
}
return ret;
}
@@ -216,8 +270,10 @@ static void nfs_end_page_writeback(struc
end_page_writeback(page);
page_cache_release(page);
- if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
- clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
+
+ nfs_wakeup_congested(atomic_long_dec_return(&nfss->writeback),
+ &nfss->backing_dev_info,
+ nfss->writeback_wait);
}
static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
@@ -318,19 +374,34 @@ static int nfs_writepage_locked(struct p
int nfs_writepage(struct page *page, struct writeback_control *wbc)
{
+ struct inode *inode = page->mapping->host;
+ struct nfs_server *nfss = NFS_SERVER(inode);
int ret;
ret = nfs_writepage_locked(page, wbc);
unlock_page(page);
+
+ nfs_wait_contested(wbc->sync_mode == WB_SYNC_ALL,
+ &nfss->backing_dev_info,
+ nfss->writeback_wait);
+
return ret;
}
-static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
+static int nfs_writepages_callback(struct page *page,
+ struct writeback_control *wbc, void *data)
{
+ struct inode *inode = page->mapping->host;
+ struct nfs_server *nfss = NFS_SERVER(inode);
int ret;
ret = nfs_do_writepage(page, wbc, data);
unlock_page(page);
+
+ nfs_wait_contested(wbc->sync_mode == WB_SYNC_ALL,
+ &nfss->backing_dev_info,
+ nfss->writeback_wait);
+
return ret;
}
--- linux-next.orig/include/linux/nfs_fs_sb.h 2011-03-03 14:03:38.000000000 +0800
+++ linux-next/include/linux/nfs_fs_sb.h 2011-03-03 14:03:40.000000000 +0800
@@ -102,6 +102,7 @@ struct nfs_server {
struct nfs_iostats __percpu *io_stats; /* I/O statistics */
struct backing_dev_info backing_dev_info;
atomic_long_t writeback; /* number of writeback pages */
+ wait_queue_head_t writeback_wait[2];
int flags; /* various flags */
unsigned int caps; /* server capabilities */
unsigned int rsize; /* read size */
--- linux-next.orig/fs/nfs/client.c 2011-03-03 14:03:38.000000000 +0800
+++ linux-next/fs/nfs/client.c 2011-03-03 14:03:40.000000000 +0800
@@ -1042,6 +1042,8 @@ static struct nfs_server *nfs_alloc_serv
INIT_LIST_HEAD(&server->delegations);
atomic_set(&server->active, 0);
+ init_waitqueue_head(&server->writeback_wait[BLK_RW_SYNC]);
+ init_waitqueue_head(&server->writeback_wait[BLK_RW_ASYNC]);
server->io_stats = nfs_alloc_iostats();
if (!server->io_stats) {
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
WARNING: multiple messages have this Message-ID (diff)
From: Wu Fengguang <fengguang.wu@intel.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jan Kara <jack@suse.cz>, Jens Axboe <axboe@kernel.dk>,
Chris Mason <chris.mason@oracle.com>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Trond Myklebust <Trond.Myklebust@netapp.com>,
Wu Fengguang <fengguang.wu@intel.com>,
Christoph Hellwig <hch@lst.de>,
Dave Chinner <david@fromorbit.com>, Theodore Ts'o <tytso@mit.edu>,
Mel Gorman <mel@csn.ul.ie>, Rik van Riel <riel@redhat.com>,
KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
Greg Thelen <gthelen@google.com>,
Minchan Kim <minchan.kim@gmail.com>,
Vivek Goyal <vgoyal@redhat.com>,
Andrea Righi <arighi@develer.com>,
Balbir Singh <balbir@linux.vnet.ibm.com>,
linux-mm <linux-mm@kvack.org>,
linux-fsdevel@vger.kernel.org,
LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH 09/27] nfs: writeback pages wait queue
Date: Thu, 03 Mar 2011 14:45:14 +0800 [thread overview]
Message-ID: <20110303074949.809203319@intel.com> (raw)
In-Reply-To: 20110303064505.718671603@intel.com
[-- Attachment #1: writeback-nfs-request-queue.patch --]
[-- Type: text/plain, Size: 6157 bytes --]
The generic writeback routines are departing from congestion_wait()
in preference of get_request_wait(), aka. waiting on the block queues.
Introduce the missing writeback wait queue for NFS, otherwise its
writeback pages will grow out of control, exhausting all PG_dirty pages.
CC: Jens Axboe <axboe@kernel.dk>
CC: Chris Mason <chris.mason@oracle.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
fs/nfs/client.c | 2
fs/nfs/write.c | 93 +++++++++++++++++++++++++++++++-----
include/linux/nfs_fs_sb.h | 1
3 files changed, 85 insertions(+), 11 deletions(-)
--- linux-next.orig/fs/nfs/write.c 2011-03-03 14:03:40.000000000 +0800
+++ linux-next/fs/nfs/write.c 2011-03-03 14:03:40.000000000 +0800
@@ -185,11 +185,68 @@ static int wb_priority(struct writeback_
* NFS congestion control
*/
+#define NFS_WAIT_PAGES (1024L >> (PAGE_SHIFT - 10))
int nfs_congestion_kb;
-#define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10))
-#define NFS_CONGESTION_OFF_THRESH \
- (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
+/*
+ * SYNC requests will block on (2*limit) and wakeup on (2*limit-NFS_WAIT_PAGES)
+ * ASYNC requests will block on (limit) and wakeup on (limit - NFS_WAIT_PAGES)
+ * In this way SYNC writes will never be blocked by ASYNC ones.
+ */
+
+static void nfs_set_congested(long nr, struct backing_dev_info *bdi)
+{
+ long limit = nfs_congestion_kb >> (PAGE_SHIFT - 10);
+
+ if (nr > limit && !test_bit(BDI_async_congested, &bdi->state))
+ set_bdi_congested(bdi, BLK_RW_ASYNC);
+ else if (nr > 2 * limit && !test_bit(BDI_sync_congested, &bdi->state))
+ set_bdi_congested(bdi, BLK_RW_SYNC);
+}
+
+static void nfs_wait_contested(int is_sync,
+ struct backing_dev_info *bdi,
+ wait_queue_head_t *wqh)
+{
+ int waitbit = is_sync ? BDI_sync_congested : BDI_async_congested;
+ DEFINE_WAIT(wait);
+
+ if (!test_bit(waitbit, &bdi->state))
+ return;
+
+ for (;;) {
+ prepare_to_wait(&wqh[is_sync], &wait, TASK_UNINTERRUPTIBLE);
+ if (!test_bit(waitbit, &bdi->state))
+ break;
+
+ io_schedule();
+ }
+ finish_wait(&wqh[is_sync], &wait);
+}
+
+static void nfs_wakeup_congested(long nr,
+ struct backing_dev_info *bdi,
+ wait_queue_head_t *wqh)
+{
+ long limit = nfs_congestion_kb >> (PAGE_SHIFT - 10);
+
+ if (nr < 2 * limit - min(limit / 8, NFS_WAIT_PAGES)) {
+ if (test_bit(BDI_sync_congested, &bdi->state)) {
+ clear_bdi_congested(bdi, BLK_RW_SYNC);
+ smp_mb__after_clear_bit();
+ }
+ if (waitqueue_active(&wqh[BLK_RW_SYNC]))
+ wake_up(&wqh[BLK_RW_SYNC]);
+ }
+ if (nr < limit - min(limit / 8, NFS_WAIT_PAGES)) {
+ if (test_bit(BDI_async_congested, &bdi->state)) {
+ clear_bdi_congested(bdi, BLK_RW_ASYNC);
+ smp_mb__after_clear_bit();
+ }
+ if (waitqueue_active(&wqh[BLK_RW_ASYNC]))
+ wake_up(&wqh[BLK_RW_ASYNC]);
+ }
+}
static int nfs_set_page_writeback(struct page *page)
{
@@ -200,11 +257,8 @@ static int nfs_set_page_writeback(struct
struct nfs_server *nfss = NFS_SERVER(inode);
page_cache_get(page);
- if (atomic_long_inc_return(&nfss->writeback) >
- NFS_CONGESTION_ON_THRESH) {
- set_bdi_congested(&nfss->backing_dev_info,
- BLK_RW_ASYNC);
- }
+ nfs_set_congested(atomic_long_inc_return(&nfss->writeback),
+ &nfss->backing_dev_info);
}
return ret;
}
@@ -216,8 +270,10 @@ static void nfs_end_page_writeback(struc
end_page_writeback(page);
page_cache_release(page);
- if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
- clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
+
+ nfs_wakeup_congested(atomic_long_dec_return(&nfss->writeback),
+ &nfss->backing_dev_info,
+ nfss->writeback_wait);
}
static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
@@ -318,19 +374,34 @@ static int nfs_writepage_locked(struct p
int nfs_writepage(struct page *page, struct writeback_control *wbc)
{
+ struct inode *inode = page->mapping->host;
+ struct nfs_server *nfss = NFS_SERVER(inode);
int ret;
ret = nfs_writepage_locked(page, wbc);
unlock_page(page);
+
+ nfs_wait_contested(wbc->sync_mode == WB_SYNC_ALL,
+ &nfss->backing_dev_info,
+ nfss->writeback_wait);
+
return ret;
}
-static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
+static int nfs_writepages_callback(struct page *page,
+ struct writeback_control *wbc, void *data)
{
+ struct inode *inode = page->mapping->host;
+ struct nfs_server *nfss = NFS_SERVER(inode);
int ret;
ret = nfs_do_writepage(page, wbc, data);
unlock_page(page);
+
+ nfs_wait_contested(wbc->sync_mode == WB_SYNC_ALL,
+ &nfss->backing_dev_info,
+ nfss->writeback_wait);
+
return ret;
}
--- linux-next.orig/include/linux/nfs_fs_sb.h 2011-03-03 14:03:38.000000000 +0800
+++ linux-next/include/linux/nfs_fs_sb.h 2011-03-03 14:03:40.000000000 +0800
@@ -102,6 +102,7 @@ struct nfs_server {
struct nfs_iostats __percpu *io_stats; /* I/O statistics */
struct backing_dev_info backing_dev_info;
atomic_long_t writeback; /* number of writeback pages */
+ wait_queue_head_t writeback_wait[2];
int flags; /* various flags */
unsigned int caps; /* server capabilities */
unsigned int rsize; /* read size */
--- linux-next.orig/fs/nfs/client.c 2011-03-03 14:03:38.000000000 +0800
+++ linux-next/fs/nfs/client.c 2011-03-03 14:03:40.000000000 +0800
@@ -1042,6 +1042,8 @@ static struct nfs_server *nfs_alloc_serv
INIT_LIST_HEAD(&server->delegations);
atomic_set(&server->active, 0);
+ init_waitqueue_head(&server->writeback_wait[BLK_RW_SYNC]);
+ init_waitqueue_head(&server->writeback_wait[BLK_RW_ASYNC]);
server->io_stats = nfs_alloc_iostats();
if (!server->io_stats) {
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2011-03-03 8:22 UTC|newest]
Thread overview: 113+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-03-03 6:45 [PATCH 00/27] IO-less dirty throttling v6 Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 01/27] writeback: add bdi_dirty_limit() kernel-doc Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 02/27] writeback: avoid duplicate balance_dirty_pages_ratelimited() calls Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 03/27] writeback: skip balance_dirty_pages() for in-memory fs Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 04/27] writeback: reduce per-bdi dirty threshold ramp up time Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 05/27] btrfs: avoid duplicate balance_dirty_pages_ratelimited() calls Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 06/27] btrfs: lower the dirty balance poll interval Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-04 6:22 ` Dave Chinner
2011-03-04 6:22 ` Dave Chinner
2011-03-04 7:57 ` Wu Fengguang
2011-03-04 7:57 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 07/27] btrfs: wait on too many nr_async_bios Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 08/27] nfs: dirty livelock prevention is now done in VFS Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang [this message]
2011-03-03 6:45 ` [PATCH 09/27] nfs: writeback pages wait queue Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 16:07 ` Peter Zijlstra
2011-03-03 16:07 ` Peter Zijlstra
2011-03-04 1:53 ` Wu Fengguang
2011-03-04 1:53 ` Wu Fengguang
2011-03-03 16:08 ` Peter Zijlstra
2011-03-03 16:08 ` Peter Zijlstra
2011-03-04 2:01 ` Wu Fengguang
2011-03-04 2:01 ` Wu Fengguang
2011-03-04 9:10 ` Peter Zijlstra
2011-03-04 9:10 ` Peter Zijlstra
2011-03-04 9:26 ` Peter Zijlstra
2011-03-04 9:26 ` Peter Zijlstra
2011-03-04 14:38 ` Wu Fengguang
2011-03-04 14:38 ` Wu Fengguang
2011-03-04 14:41 ` Peter Zijlstra
2011-03-04 14:41 ` Peter Zijlstra
2011-03-03 6:45 ` [PATCH 10/27] nfs: limit the commit size to reduce fluctuations Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 11/27] nfs: limit the commit range Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 12/27] nfs: lower writeback threshold proportionally to dirty threshold Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 13/27] writeback: account per-bdi accumulated written pages Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 14/27] writeback: account per-bdi accumulated dirtied pages Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 15/27] writeback: bdi write bandwidth estimation Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 16/27] writeback: smoothed global/bdi dirty pages Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 17/27] writeback: smoothed dirty threshold and limit Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 18/27] writeback: enforce 1/4 gap between the dirty/background thresholds Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 19/27] writeback: dirty throttle bandwidth control Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-07 21:34 ` Wu Fengguang
2011-03-07 21:34 ` Wu Fengguang
2011-03-29 21:08 ` Wu Fengguang
2011-03-29 21:08 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 20/27] writeback: IO-less balance_dirty_pages() Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 21/27] writeback: show bdi write bandwidth in debugfs Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 22/27] writeback: trace dirty_throttle_bandwidth Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 23/27] writeback: trace balance_dirty_pages Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 24/27] writeback: trace global_dirty_state Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 25/27] writeback: make nr_to_write a per-file limit Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 26/27] writeback: scale IO chunk size up to device bandwidth Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` [PATCH 27/27] writeback: trace writeback_single_inode Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 6:45 ` Wu Fengguang
2011-03-03 20:12 ` [PATCH 00/27] IO-less dirty throttling v6 Vivek Goyal
2011-03-03 20:12 ` Vivek Goyal
2011-03-03 20:48 ` Vivek Goyal
2011-03-03 20:48 ` Vivek Goyal
2011-03-04 9:06 ` Wu Fengguang
2011-03-04 9:06 ` Wu Fengguang
2011-04-04 18:12 ` async write IO controllers Wu Fengguang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110303074949.809203319@intel.com \
--to=fengguang.wu@intel.com \
--cc=Trond.Myklebust@netapp.com \
--cc=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=axboe@kernel.dk \
--cc=chris.mason@oracle.com \
--cc=jack@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.