* [PATCH 1/2] fs/buffer.c: allocate buffer cache from non-movable area
@ 2014-07-22 5:20 Gioh Kim
0 siblings, 0 replies; 5+ messages in thread
From: Gioh Kim @ 2014-07-22 5:20 UTC (permalink / raw)
To: Alexander Viro, Andrew Morton, paulmck, peterz, Jan Kara,
linux-fsdevel, linux-kernel, Theodore Ts'o, Andreas Dilger,
linux-ext4
A buffer cache is allocated from movable area
because it is referred for a while and released soon.
But some filesystems are taking buffer cache for a long time
and it can disturb page migration.
A new API should be introduced to allocate buffer cache from
non-movable area.
Signed-off-by: Gioh Kim <gioh.kim@lge.com>
---
fs/buffer.c | 39 ++++++++++++++++++++++++++++++++-------
include/linux/buffer_head.h | 8 ++++++++
2 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/fs/buffer.c b/fs/buffer.c
index 8f05111..8c7ed02 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -993,7 +993,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
*/
static int
grow_dev_page(struct block_device *bdev, sector_t block,
- pgoff_t index, int size, int sizebits)
+ pgoff_t index, int size, int sizebits, gfp_t movable_mask)
{
struct inode *inode = bdev->bd_inode;
struct page *page;
@@ -1003,7 +1003,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
gfp_t gfp_mask;
gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
- gfp_mask |= __GFP_MOVABLE;
+ if (movable_mask & __GFP_MOVABLE)
+ gfp_mask |= __GFP_MOVABLE;
/*
* XXX: __getblk_slow() can not really deal with failure and
* will endlessly loop on improvised global reclaim. Prefer
@@ -1058,7 +1059,8 @@ failed:
* that page was dirty, the buffers are set dirty also.
*/
static int
-grow_buffers(struct block_device *bdev, sector_t block, int size)
+grow_buffers(struct block_device *bdev, sector_t block,
+ int size, gfp_t movable_mask)
{
pgoff_t index;
int sizebits;
@@ -1085,11 +1087,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
}
/* Create a page with the proper size buffers.. */
- return grow_dev_page(bdev, block, index, size, sizebits);
+ return grow_dev_page(bdev, block, index, size, sizebits, movable_mask);
}
static struct buffer_head *
-__getblk_slow(struct block_device *bdev, sector_t block, int size)
+__getblk_slow(struct block_device *bdev, sector_t block,
+ int size, gfp_t movable_mask)
{
/* Size must be multiple of hard sectorsize */
if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
@@ -1111,7 +1114,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
if (bh)
return bh;
- ret = grow_buffers(bdev, block, size);
+ ret = grow_buffers(bdev, block, size, movable_mask);
if (ret < 0)
return NULL;
if (ret == 0)
@@ -1385,7 +1388,7 @@ __getblk(struct block_device *bdev, sector_t block, unsigned size)
might_sleep();
if (bh == NULL)
- bh = __getblk_slow(bdev, block, size);
+ bh = __getblk_slow(bdev, block, size, __GFP_MOVABLE);
return bh;
}
EXPORT_SYMBOL(__getblk);
@@ -1410,6 +1413,7 @@ EXPORT_SYMBOL(__breadahead);
* @size: size (in bytes) to read
*
* Reads a specified block, and returns buffer head that contains it.
+ * The page cache is allocated from movable area.
* It returns NULL if the block was unreadable.
*/
struct buffer_head *
@@ -1423,6 +1427,27 @@ __bread(struct block_device *bdev, sector_t block, unsigned size)
}
EXPORT_SYMBOL(__bread);
+/**
+ * __bread_nonmovable() - reads a specified block and returns the bh
+ * @bdev: the block_device to read from
+ * @block: number of block
+ * @size: size (in bytes) to read
+ *
+ * Reads a specified block, and returns buffer head that contains it.
+ * The page cache is allocated from non-movable area.
+ * It returns NULL if the block was unreadable.
+ */
+struct buffer_head *
+__bread_nonmovable(struct block_device *bdev, sector_t block, unsigned size)
+{
+ struct buffer_head *bh = __getblk_slow(bdev, block, size, 0);
+
+ if (likely(bh) && !buffer_uptodate(bh))
+ bh = __bread_slow(bh);
+ return bh;
+}
+EXPORT_SYMBOL(__bread_nonmovable);
+
/*
* invalidate_bh_lrus() is called rarely - but not only at unmount.
* This doesn't race because it runs in each cpu either in irq
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 324329c..cf8def2 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -181,6 +181,8 @@ void __brelse(struct buffer_head *);
void __bforget(struct buffer_head *);
void __breadahead(struct block_device *, sector_t block, unsigned int size);
struct buffer_head *__bread(struct block_device *, sector_t block, unsigned size);
+struct buffer_head *__bread_nonmovable(struct block_device *,
+ sector_t block, unsigned size);
void invalidate_bh_lrus(void);
struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
void free_buffer_head(struct buffer_head * bh);
@@ -298,6 +300,12 @@ sb_bread(struct super_block *sb, sector_t block)
return __bread(sb->s_bdev, block, sb->s_blocksize);
}
+static inline struct buffer_head *
+sb_bread_nonmovable(struct super_block *sb, sector_t block)
+{
+ return __bread_nonmovable(sb->s_bdev, block, sb->s_blocksize);
+}
+
static inline void
sb_breadahead(struct super_block *sb, sector_t block)
{
--
1.7.9.5
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 0/2] new APIs to allocate buffer-cache for superblock in non-movable area
@ 2014-08-14 5:12 Gioh Kim
2014-08-14 5:15 ` [PATCH 1/2] fs/buffer.c: allocate buffer cache from " Gioh Kim
0 siblings, 1 reply; 5+ messages in thread
From: Gioh Kim @ 2014-08-14 5:12 UTC (permalink / raw)
To: Alexander Viro, Andrew Morton, Paul E. McKenney, Peter Zijlstra,
Jan Kara, linux-fsdevel, linux-kernel, Theodore Ts'o,
Andreas Dilger, linux-ext4
Cc: Minchan Kim, Joonsoo Kim, 이건호
Hello,
This patch try to solve problem that a long-lasting page caches of
ext4 superblock and journaling of superblock disturb page migration.
I've been testing CMA feature on my ARM-based platform
and found that two page caches cannot be migrated.
They are page caches of superblock of ext4 filesystem and its journaling data.
Current ext4 reads superblock with sb_bread() that allocates page
from movable area. But the problem is that ext4 hold the page until
it is unmounted. If root filesystem is ext4 the page cannot be migrated forever.
And also the journaling data for the superblock cannot be migreated.
I introduce a new API for allocating page cache from non-movable area.
It is useful for ext4/ext3 and others that want to hold page cache for a long time.
I have 3 patchs:
1. Patch 1/3: introduce a new API that create page cache from non-movable area
2. Patch 2/3: have ext4 use the new API to read superblock
3. Patch 3/3: have jbd/jbd2 use the new API to make journaling of superblock
This patchset is based on linux-next-20140814.
Thanks a lot.
Gioh Kim (3):
fs/buffer.c: allocate buffer cache from non-movable area
ext4: allocate buffer-cache for superblock in non-movable area
jbd-jbd2-allocate-buffer-cache-for-superblock-inode-.patch
fs/buffer.c | 63 ++++++++++++++++++++++++++++++----
fs/ext4/super.c | 6 +--
fs/jbd/journal.c | 2 -
fs/jbd2/journal.c | 2 -
include/linux/buffer_head.h | 10 +++++
5 files changed, 71 insertions(+), 12 deletions(-)
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/2] fs/buffer.c: allocate buffer cache from non-movable area
2014-08-14 5:12 [PATCH 0/2] new APIs to allocate buffer-cache for superblock in " Gioh Kim
@ 2014-08-14 5:15 ` Gioh Kim
2014-08-14 5:19 ` Gioh Kim
2014-08-14 21:22 ` Andrew Morton
0 siblings, 2 replies; 5+ messages in thread
From: Gioh Kim @ 2014-08-14 5:15 UTC (permalink / raw)
To: Alexander Viro, Andrew Morton, Paul E. McKenney, Peter Zijlstra,
Jan Kara, linux-fsdevel, linux-kernel, Theodore Ts'o,
Andreas Dilger, linux-ext4
Cc: Minchan Kim, Joonsoo Kim, 이건호
A buffer cache is allocated from movable area
because it is referred for a while and released soon.
But some filesystems are taking buffer cache for a long time
and it can disturb page migration.
A new API should be introduced to allocate buffer cache from
non-movable area.
Signed-off-by: Gioh Kim <gioh.kim@lge.com>
---
fs/buffer.c | 63 ++++++++++++++++++++++++++++++++++++++-----
include/linux/buffer_head.h | 10 +++++++
2 files changed, 66 insertions(+), 7 deletions(-)
diff --git a/fs/buffer.c b/fs/buffer.c
index 8f05111..7ef658f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -993,7 +993,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
*/
static int
grow_dev_page(struct block_device *bdev, sector_t block,
- pgoff_t index, int size, int sizebits)
+ pgoff_t index, int size, int sizebits, gfp_t movable_mask)
{
struct inode *inode = bdev->bd_inode;
struct page *page;
@@ -1003,7 +1003,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
gfp_t gfp_mask;
gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
- gfp_mask |= __GFP_MOVABLE;
+ if (movable_mask & __GFP_MOVABLE)
+ gfp_mask |= __GFP_MOVABLE;
/*
* XXX: __getblk_slow() can not really deal with failure and
* will endlessly loop on improvised global reclaim. Prefer
@@ -1058,7 +1059,8 @@ failed:
* that page was dirty, the buffers are set dirty also.
*/
static int
-grow_buffers(struct block_device *bdev, sector_t block, int size)
+grow_buffers(struct block_device *bdev, sector_t block,
+ int size, gfp_t movable_mask)
{
pgoff_t index;
int sizebits;
@@ -1085,11 +1087,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
}
/* Create a page with the proper size buffers.. */
- return grow_dev_page(bdev, block, index, size, sizebits);
+ return grow_dev_page(bdev, block, index, size, sizebits, movable_mask);
}
static struct buffer_head *
-__getblk_slow(struct block_device *bdev, sector_t block, int size)
+__getblk_slow(struct block_device *bdev, sector_t block,
+ int size, gfp_t movable_mask)
{
/* Size must be multiple of hard sectorsize */
if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
@@ -1111,7 +1114,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
if (bh)
return bh;
- ret = grow_buffers(bdev, block, size);
+ ret = grow_buffers(bdev, block, size, movable_mask);
if (ret < 0)
return NULL;
if (ret == 0)
@@ -1385,11 +1388,34 @@ __getblk(struct block_device *bdev, sector_t block, unsigned size)
might_sleep();
if (bh == NULL)
- bh = __getblk_slow(bdev, block, size);
+ bh = __getblk_slow(bdev, block, size, __GFP_MOVABLE);
return bh;
}
EXPORT_SYMBOL(__getblk);
+ /*
+ * __getblk_nonmovable will locate (and, if necessary, create) the buffer_head
+ * which corresponds to the passed block_device, block and size. The
+ * returned buffer has its reference count incremented.
+ *
+ * The page cache is allocated from non-movable area
+ * not to prevent page migration.
+ *
+ * __getblk()_nonmovable will lock up the machine
+ * if grow_dev_page's try_to_free_buffers() attempt is failing. FIXME, perhaps?
+ */
+struct buffer_head *
+__getblk_nonmovable(struct block_device *bdev, sector_t block, unsigned size)
+{
+ struct buffer_head *bh = __find_get_block(bdev, block, size);
+
+ might_sleep();
+ if (bh == NULL)
+ bh = __getblk_slow(bdev, block, size, 0);
+ return bh;
+}
+EXPORT_SYMBOL(__getblk_nonmovable);
+
/*
* Do async read-ahead on a buffer..
*/
@@ -1410,6 +1436,7 @@ EXPORT_SYMBOL(__breadahead);
* @size: size (in bytes) to read
*
* Reads a specified block, and returns buffer head that contains it.
+ * The page cache is allocated from movable area so that it can be migrated.
* It returns NULL if the block was unreadable.
*/
struct buffer_head *
@@ -1423,6 +1450,28 @@ __bread(struct block_device *bdev, sector_t block, unsigned size)
}
EXPORT_SYMBOL(__bread);
+/**
+ * __bread_nonmovable() - reads a specified block and returns the bh
+ * @bdev: the block_device to read from
+ * @block: number of block
+ * @size: size (in bytes) to read
+ *
+ * Reads a specified block, and returns buffer head that contains it.
+ * The page cache is allocated from non-movable area
+ * not to prevent page migration.
+ * It returns NULL if the block was unreadable.
+ */
+struct buffer_head *
+__bread_nonmovable(struct block_device *bdev, sector_t block, unsigned size)
+{
+ struct buffer_head *bh = __getblk_slow(bdev, block, size, 0);
+
+ if (likely(bh) && !buffer_uptodate(bh))
+ bh = __bread_slow(bh);
+ return bh;
+}
+EXPORT_SYMBOL(__bread_nonmovable);
+
/*
* invalidate_bh_lrus() is called rarely - but not only at unmount.
* This doesn't race because it runs in each cpu either in irq
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 324329c..3f52370 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -177,10 +177,14 @@ struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block,
unsigned size);
struct buffer_head *__getblk(struct block_device *bdev, sector_t block,
unsigned size);
+struct buffer_head *__getblk_nonmovable(struct block_device *bdev,
+ sector_t block, unsigned size);
void __brelse(struct buffer_head *);
void __bforget(struct buffer_head *);
void __breadahead(struct block_device *, sector_t block, unsigned int size);
struct buffer_head *__bread(struct block_device *, sector_t block, unsigned size);
+struct buffer_head *__bread_nonmovable(struct block_device *,
+ sector_t block, unsigned size);
void invalidate_bh_lrus(void);
struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
void free_buffer_head(struct buffer_head * bh);
@@ -298,6 +302,12 @@ sb_bread(struct super_block *sb, sector_t block)
return __bread(sb->s_bdev, block, sb->s_blocksize);
}
+static inline struct buffer_head *
+sb_bread_nonmovable(struct super_block *sb, sector_t block)
+{
+ return __bread_nonmovable(sb->s_bdev, block, sb->s_blocksize);
+}
+
static inline void
sb_breadahead(struct super_block *sb, sector_t block)
{
--
1.7.9.5
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] fs/buffer.c: allocate buffer cache from non-movable area
2014-08-14 5:15 ` [PATCH 1/2] fs/buffer.c: allocate buffer cache from " Gioh Kim
@ 2014-08-14 5:19 ` Gioh Kim
2014-08-14 21:22 ` Andrew Morton
1 sibling, 0 replies; 5+ messages in thread
From: Gioh Kim @ 2014-08-14 5:19 UTC (permalink / raw)
To: Alexander Viro, Andrew Morton, Paul E. McKenney, Peter Zijlstra,
Jan Kara, linux-fsdevel, linux-kernel, Theodore Ts'o,
Andreas Dilger, linux-ext4
Cc: Minchan Kim, Joonsoo Kim, 이건호
I'm sorry for a typo at the title.
It is 1/3, not 1/2.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] fs/buffer.c: allocate buffer cache from non-movable area
2014-08-14 5:15 ` [PATCH 1/2] fs/buffer.c: allocate buffer cache from " Gioh Kim
2014-08-14 5:19 ` Gioh Kim
@ 2014-08-14 21:22 ` Andrew Morton
2014-08-18 1:19 ` Gioh Kim
1 sibling, 1 reply; 5+ messages in thread
From: Andrew Morton @ 2014-08-14 21:22 UTC (permalink / raw)
To: Gioh Kim
Cc: Alexander Viro, Paul E. McKenney, Peter Zijlstra, Jan Kara,
linux-fsdevel, linux-kernel, Theodore Ts'o, Andreas Dilger,
linux-ext4, Minchan Kim, Joonsoo Kim, 이건호
On Thu, 14 Aug 2014 14:15:40 +0900 Gioh Kim <gioh.kim@lge.com> wrote:
> A buffer cache is allocated from movable area
> because it is referred for a while and released soon.
> But some filesystems are taking buffer cache for a long time
> and it can disturb page migration.
>
> A new API should be introduced to allocate buffer cache from
> non-movable area.
I think the API could and should be more flexible than this.
Rather than making the API be "movable or not movable", let's permit
callers to specify the gfp_t and leave it at that. That way, if
someone later wants to allocate a buffer head with, I dunno,
__GFP_NOTRACK then they can do so.
So the word "movable" shouldn't appear in buffer.c at all, except in a
single place.
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -993,7 +993,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
> */
> static int
> grow_dev_page(struct block_device *bdev, sector_t block,
> - pgoff_t index, int size, int sizebits)
> + pgoff_t index, int size, int sizebits, gfp_t movable_mask)
s/movable_mask/gfp/
> {
> struct inode *inode = bdev->bd_inode;
> struct page *page;
> @@ -1003,7 +1003,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
> gfp_t gfp_mask;
>
> gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
> - gfp_mask |= __GFP_MOVABLE;
> + if (movable_mask & __GFP_MOVABLE)
> + gfp_mask |= __GFP_MOVABLE;
This becomes
gfp_mask |= gfp;
> /*
> * XXX: __getblk_slow() can not really deal with failure and
> * will endlessly loop on improvised global reclaim. Prefer
> @@ -1058,7 +1059,8 @@ failed:
> * that page was dirty, the buffers are set dirty also.
> */
> static int
> -grow_buffers(struct block_device *bdev, sector_t block, int size)
> +grow_buffers(struct block_device *bdev, sector_t block,
> + int size, gfp_t movable_mask)
gfp
> {
> pgoff_t index;
> int sizebits;
> @@ -1085,11 +1087,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
> }
>
> /* Create a page with the proper size buffers.. */
> - return grow_dev_page(bdev, block, index, size, sizebits);
> + return grow_dev_page(bdev, block, index, size, sizebits, movable_mask);
> }
>
> static struct buffer_head *
> -__getblk_slow(struct block_device *bdev, sector_t block, int size)
> +__getblk_slow(struct block_device *bdev, sector_t block,
> + int size, gfp_t movable_mask)
gfp
> {
> /* Size must be multiple of hard sectorsize */
> if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
> @@ -1111,7 +1114,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
> if (bh)
> return bh;
>
> - ret = grow_buffers(bdev, block, size);
> + ret = grow_buffers(bdev, block, size, movable_mask);
gfp
> if (ret < 0)
> return NULL;
> if (ret == 0)
> @@ -1385,11 +1388,34 @@ __getblk(struct block_device *bdev, sector_t block, unsigned size)
>
> might_sleep();
> if (bh == NULL)
> - bh = __getblk_slow(bdev, block, size);
> + bh = __getblk_slow(bdev, block, size, __GFP_MOVABLE);
Here is the place where buffer.c. mentions "movable".
> return bh;
> }
> EXPORT_SYMBOL(__getblk);
>
> + /*
> + * __getblk_nonmovable will locate (and, if necessary, create) the buffer_head
> + * which corresponds to the passed block_device, block and size. The
> + * returned buffer has its reference count incremented.
> + *
> + * The page cache is allocated from non-movable area
> + * not to prevent page migration.
> + *
> + * __getblk()_nonmovable will lock up the machine
> + * if grow_dev_page's try_to_free_buffers() attempt is failing. FIXME, perhaps?
> + */
> +struct buffer_head *
> +__getblk_nonmovable(struct block_device *bdev, sector_t block, unsigned size)
> +{
> + struct buffer_head *bh = __find_get_block(bdev, block, size);
> +
> + might_sleep();
> + if (bh == NULL)
> + bh = __getblk_slow(bdev, block, size, 0);
> + return bh;
> +}
> +EXPORT_SYMBOL(__getblk_nonmovable);
Suggest this be called __getblk_gfp(bdev, block, size, gfp) and then
__getblk() be changed to call __getblk_gfp(..., __GFP_MOVABLE).
We could then write a __getblk_nonmovable() which calls __getblk_gfp()
(a static inlined one-line function) or we can just call
__getblk_gfp(..., 0) directly from filesystems.
> @@ -1423,6 +1450,28 @@ __bread(struct block_device *bdev, sector_t block, unsigned size)
> }
> EXPORT_SYMBOL(__bread);
>
> +/**
> + * __bread_nonmovable() - reads a specified block and returns the bh
> + * @bdev: the block_device to read from
> + * @block: number of block
> + * @size: size (in bytes) to read
> + *
> + * Reads a specified block, and returns buffer head that contains it.
> + * The page cache is allocated from non-movable area
> + * not to prevent page migration.
> + * It returns NULL if the block was unreadable.
> + */
> +struct buffer_head *
> +__bread_nonmovable(struct block_device *bdev, sector_t block, unsigned size)
> +{
> + struct buffer_head *bh = __getblk_slow(bdev, block, size, 0);
> +
> + if (likely(bh) && !buffer_uptodate(bh))
> + bh = __bread_slow(bh);
> + return bh;
> +}
> +EXPORT_SYMBOL(__bread_nonmovable);
Treat this in the same fashion as __getblk_nonmovable().
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] fs/buffer.c: allocate buffer cache from non-movable area
2014-08-14 21:22 ` Andrew Morton
@ 2014-08-18 1:19 ` Gioh Kim
0 siblings, 0 replies; 5+ messages in thread
From: Gioh Kim @ 2014-08-18 1:19 UTC (permalink / raw)
To: Andrew Morton
Cc: Alexander Viro, Paul E. McKenney, Peter Zijlstra, Jan Kara,
linux-fsdevel, linux-kernel, Theodore Ts'o, Andreas Dilger,
linux-ext4, Minchan Kim, Joonsoo Kim, 이건호
2014-08-15 오전 6:22, Andrew Morton 쓴 글:
> On Thu, 14 Aug 2014 14:15:40 +0900 Gioh Kim <gioh.kim@lge.com> wrote:
>
>> A buffer cache is allocated from movable area
>> because it is referred for a while and released soon.
>> But some filesystems are taking buffer cache for a long time
>> and it can disturb page migration.
>>
>> A new API should be introduced to allocate buffer cache from
>> non-movable area.
>
> I think the API could and should be more flexible than this.
>
> Rather than making the API be "movable or not movable", let's permit
> callers to specify the gfp_t and leave it at that. That way, if
> someone later wants to allocate a buffer head with, I dunno,
> __GFP_NOTRACK then they can do so.
>
> So the word "movable" shouldn't appear in buffer.c at all, except in a
> single place.
Absolutely I agree with you.
If filesystem developers agree this patch I will send 2nd patch that applies your ideas.
Thank you for your advices.
>
>> --- a/fs/buffer.c
>> +++ b/fs/buffer.c
>> @@ -993,7 +993,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
>> */
>> static int
>> grow_dev_page(struct block_device *bdev, sector_t block,
>> - pgoff_t index, int size, int sizebits)
>> + pgoff_t index, int size, int sizebits, gfp_t movable_mask)
>
> s/movable_mask/gfp/
I got it.
>
>> {
>> struct inode *inode = bdev->bd_inode;
>> struct page *page;
>> @@ -1003,7 +1003,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
>> gfp_t gfp_mask;
>>
>> gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
>> - gfp_mask |= __GFP_MOVABLE;
>> + if (movable_mask & __GFP_MOVABLE)
>> + gfp_mask |= __GFP_MOVABLE;
>
> This becomes
>
> gfp_mask |= gfp;
I got it.
>
>> /*
>> * XXX: __getblk_slow() can not really deal with failure and
>> * will endlessly loop on improvised global reclaim. Prefer
>> @@ -1058,7 +1059,8 @@ failed:
>> * that page was dirty, the buffers are set dirty also.
>> */
>> static int
>> -grow_buffers(struct block_device *bdev, sector_t block, int size)
>> +grow_buffers(struct block_device *bdev, sector_t block,
>> + int size, gfp_t movable_mask)
>
> gfp
>
>> {
>> pgoff_t index;
>> int sizebits;
>> @@ -1085,11 +1087,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
>> }
>>
>> /* Create a page with the proper size buffers.. */
>> - return grow_dev_page(bdev, block, index, size, sizebits);
>> + return grow_dev_page(bdev, block, index, size, sizebits, movable_mask);
>> }
>>
>> static struct buffer_head *
>> -__getblk_slow(struct block_device *bdev, sector_t block, int size)
>> +__getblk_slow(struct block_device *bdev, sector_t block,
>> + int size, gfp_t movable_mask)
>
> gfp
>
>> {
>> /* Size must be multiple of hard sectorsize */
>> if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
>> @@ -1111,7 +1114,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
>> if (bh)
>> return bh;
>>
>> - ret = grow_buffers(bdev, block, size);
>> + ret = grow_buffers(bdev, block, size, movable_mask);
>
> gfp
>
>> if (ret < 0)
>> return NULL;
>> if (ret == 0)
>> @@ -1385,11 +1388,34 @@ __getblk(struct block_device *bdev, sector_t block, unsigned size)
>>
>> might_sleep();
>> if (bh == NULL)
>> - bh = __getblk_slow(bdev, block, size);
>> + bh = __getblk_slow(bdev, block, size, __GFP_MOVABLE);
>
> Here is the place where buffer.c. mentions "movable".
I got it.
>
>> return bh;
>> }
>> EXPORT_SYMBOL(__getblk);
>>
>> + /*
>> + * __getblk_nonmovable will locate (and, if necessary, create) the buffer_head
>> + * which corresponds to the passed block_device, block and size. The
>> + * returned buffer has its reference count incremented.
>> + *
>> + * The page cache is allocated from non-movable area
>> + * not to prevent page migration.
>> + *
>> + * __getblk()_nonmovable will lock up the machine
>> + * if grow_dev_page's try_to_free_buffers() attempt is failing. FIXME, perhaps?
>> + */
>> +struct buffer_head *
>> +__getblk_nonmovable(struct block_device *bdev, sector_t block, unsigned size)
>> +{
>> + struct buffer_head *bh = __find_get_block(bdev, block, size);
>> +
>> + might_sleep();
>> + if (bh == NULL)
>> + bh = __getblk_slow(bdev, block, size, 0);
>> + return bh;
>> +}
>> +EXPORT_SYMBOL(__getblk_nonmovable);
>
> Suggest this be called __getblk_gfp(bdev, block, size, gfp) and then
> __getblk() be changed to call __getblk_gfp(..., __GFP_MOVABLE).
>
> We could then write a __getblk_nonmovable() which calls __getblk_gfp()
> (a static inlined one-line function) or we can just call
> __getblk_gfp(..., 0) directly from filesystems.
I got it.
>
>> @@ -1423,6 +1450,28 @@ __bread(struct block_device *bdev, sector_t block, unsigned size)
>> }
>> EXPORT_SYMBOL(__bread);
>>
>> +/**
>> + * __bread_nonmovable() - reads a specified block and returns the bh
>> + * @bdev: the block_device to read from
>> + * @block: number of block
>> + * @size: size (in bytes) to read
>> + *
>> + * Reads a specified block, and returns buffer head that contains it.
>> + * The page cache is allocated from non-movable area
>> + * not to prevent page migration.
>> + * It returns NULL if the block was unreadable.
>> + */
>> +struct buffer_head *
>> +__bread_nonmovable(struct block_device *bdev, sector_t block, unsigned size)
>> +{
>> + struct buffer_head *bh = __getblk_slow(bdev, block, size, 0);
>> +
>> + if (likely(bh) && !buffer_uptodate(bh))
>> + bh = __bread_slow(bh);
>> + return bh;
>> +}
>> +EXPORT_SYMBOL(__bread_nonmovable);
>
> Treat this in the same fashion as __getblk_nonmovable().
I got it.
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2014-08-18 1:19 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-07-22 5:20 [PATCH 1/2] fs/buffer.c: allocate buffer cache from non-movable area Gioh Kim
-- strict thread matches above, loose matches on Subject: below --
2014-08-14 5:12 [PATCH 0/2] new APIs to allocate buffer-cache for superblock in " Gioh Kim
2014-08-14 5:15 ` [PATCH 1/2] fs/buffer.c: allocate buffer cache from " Gioh Kim
2014-08-14 5:19 ` Gioh Kim
2014-08-14 21:22 ` Andrew Morton
2014-08-18 1:19 ` Gioh Kim
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).