* [PATCH RFC] mm: abort inode pruning if it has active pages
@ 2011-11-16 14:47 Konstantin Khlebnikov
2011-11-18 0:30 ` Andrew Morton
0 siblings, 1 reply; 3+ messages in thread
From: Konstantin Khlebnikov @ 2011-11-16 14:47 UTC (permalink / raw)
To: linux-mm, linux-kernel; +Cc: Andrew Morton
Inode cache pruning can throw out some usefull data from page cache.
This patch aborts inode invalidation and keep inode alive if it still has
active pages. It improves interaction between inode cache and page cache.
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
---
fs/inode.c | 4 ++--
include/linux/fs.h | 2 ++
mm/truncate.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c
index 1f6c48d..8d55a63 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -663,8 +663,8 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan)
spin_unlock(&inode->i_lock);
spin_unlock(&sb->s_inode_lru_lock);
if (remove_inode_buffers(inode))
- reap += invalidate_mapping_pages(&inode->i_data,
- 0, -1);
+ reap += invalidate_inode_inactive_pages(
+ &inode->i_data, 0, -1);
iput(inode);
spin_lock(&sb->s_inode_lru_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0c4df26..05875d7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2211,6 +2211,8 @@ extern int invalidate_partition(struct gendisk *, int);
#endif
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end);
+unsigned long invalidate_inode_inactive_pages(struct address_space *mapping,
+ pgoff_t start, pgoff_t end);
static inline void invalidate_remote_inode(struct inode *inode)
{
diff --git a/mm/truncate.c b/mm/truncate.c
index 632b15e..ac739bc 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -379,6 +379,52 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
EXPORT_SYMBOL(invalidate_mapping_pages);
/*
+ * This is like invalidate_mapping_pages(),
+ * except it aborts invalidation at the first active page.
+ */
+unsigned long invalidate_inode_inactive_pages(struct address_space *mapping,
+ pgoff_t start, pgoff_t end)
+{
+ struct pagevec pvec;
+ pgoff_t index = start;
+ unsigned long ret;
+ unsigned long count = 0;
+ int i;
+
+ pagevec_init(&pvec, 0);
+ while (index <= end && pagevec_lookup(&pvec, mapping, index,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+
+ mem_cgroup_uncharge_start();
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ struct page *page = pvec.pages[i];
+
+ if (PageActive(page)) {
+ index = end;
+ break;
+ }
+
+ /* We rely upon deletion not changing page->index */
+ index = page->index;
+ if (index > end)
+ break;
+
+ if (!trylock_page(page))
+ continue;
+ WARN_ON(page->index != index);
+ ret = invalidate_inode_page(page);
+ unlock_page(page);
+ count += ret;
+ }
+ pagevec_release(&pvec);
+ mem_cgroup_uncharge_end();
+ cond_resched();
+ index++;
+ }
+ return count;
+}
+
+/*
* This is like invalidate_complete_page(), except it ignores the page's
* refcount. We do this because invalidate_inode_pages2() needs stronger
* invalidation guarantees, and cannot afford to leave pages behind because
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH RFC] mm: abort inode pruning if it has active pages
2011-11-16 14:47 [PATCH RFC] mm: abort inode pruning if it has active pages Konstantin Khlebnikov
@ 2011-11-18 0:30 ` Andrew Morton
2011-11-18 8:14 ` Konstantin Khlebnikov
0 siblings, 1 reply; 3+ messages in thread
From: Andrew Morton @ 2011-11-18 0:30 UTC (permalink / raw)
To: Konstantin Khlebnikov; +Cc: linux-mm, linux-kernel
On Wed, 16 Nov 2011 17:47:47 +0300
Konstantin Khlebnikov <khlebnikov@openvz.org> wrote:
> Inode cache pruning can throw out some usefull data from page cache.
> This patch aborts inode invalidation and keep inode alive if it still has
> active pages.
>
hm, I suppose so.
I also suppose there are various risks related to failing to reclaim
inodes due to ongoing userspace activity and then running out of lowmem
pages.
> It improves interaction between inode cache and page cache.
Well, this is the key part of the patch and it is the thing which we
are most interested in. But you didn't tell us anything about it!
So please, provide us with much more detailed information on the
observed benefits.
>
> diff --git a/fs/inode.c b/fs/inode.c
> index 1f6c48d..8d55a63 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -663,8 +663,8 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan)
> spin_unlock(&inode->i_lock);
> spin_unlock(&sb->s_inode_lru_lock);
> if (remove_inode_buffers(inode))
> - reap += invalidate_mapping_pages(&inode->i_data,
> - 0, -1);
> + reap += invalidate_inode_inactive_pages(
> + &inode->i_data, 0, -1);
> iput(inode);
> spin_lock(&sb->s_inode_lru_lock);
>
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 0c4df26..05875d7 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2211,6 +2211,8 @@ extern int invalidate_partition(struct gendisk *, int);
> #endif
> unsigned long invalidate_mapping_pages(struct address_space *mapping,
> pgoff_t start, pgoff_t end);
> +unsigned long invalidate_inode_inactive_pages(struct address_space *mapping,
> + pgoff_t start, pgoff_t end);
>
> static inline void invalidate_remote_inode(struct inode *inode)
> {
> diff --git a/mm/truncate.c b/mm/truncate.c
> index 632b15e..ac739bc 100644
> --- a/mm/truncate.c
> +++ b/mm/truncate.c
> @@ -379,6 +379,52 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
> EXPORT_SYMBOL(invalidate_mapping_pages);
>
> /*
> + * This is like invalidate_mapping_pages(),
> + * except it aborts invalidation at the first active page.
> + */
> +unsigned long invalidate_inode_inactive_pages(struct address_space *mapping,
> + pgoff_t start, pgoff_t end)
> +{
> + struct pagevec pvec;
> + pgoff_t index = start;
> + unsigned long ret;
> + unsigned long count = 0;
> + int i;
> +
> + pagevec_init(&pvec, 0);
> + while (index <= end && pagevec_lookup(&pvec, mapping, index,
> + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
> +
> + mem_cgroup_uncharge_start();
> + for (i = 0; i < pagevec_count(&pvec); i++) {
> + struct page *page = pvec.pages[i];
> +
> + if (PageActive(page)) {
> + index = end;
> + break;
> + }
> +
> + /* We rely upon deletion not changing page->index */
> + index = page->index;
> + if (index > end)
> + break;
> +
> + if (!trylock_page(page))
> + continue;
> + WARN_ON(page->index != index);
> + ret = invalidate_inode_page(page);
> + unlock_page(page);
> + count += ret;
> + }
> + pagevec_release(&pvec);
> + mem_cgroup_uncharge_end();
> + cond_resched();
> + index++;
> + }
> + return count;
> +}
We shouldn't just copy-n-paste invalidate_mapping_pages() like this.
Can't we share the function by passing in a pointer to a callback
function (invalidate_inode_page or a new
invalidate_inode_page_unless_it_is_active).
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH RFC] mm: abort inode pruning if it has active pages
2011-11-18 0:30 ` Andrew Morton
@ 2011-11-18 8:14 ` Konstantin Khlebnikov
0 siblings, 0 replies; 3+ messages in thread
From: Konstantin Khlebnikov @ 2011-11-18 8:14 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Andrew Morton wrote:
> On Wed, 16 Nov 2011 17:47:47 +0300
> Konstantin Khlebnikov<khlebnikov@openvz.org> wrote:
>
>> Inode cache pruning can throw out some usefull data from page cache.
>> This patch aborts inode invalidation and keep inode alive if it still has
>> active pages.
>>
>
> hm, I suppose so.
>
> I also suppose there are various risks related to failing to reclaim
> inodes due to ongoing userspace activity and then running out of lowmem
> pages.
Ok, I think we can bypass active-page protection if CONFIG_HIGHMEM=y and
there is no __GFP_HIGHMEM in gfp_mask.
>
>> It improves interaction between inode cache and page cache.
>
> Well, this is the key part of the patch and it is the thing which we
> are most interested in. But you didn't tell us anything about it!
>
> So please, provide us with much more detailed information on the
> observed benefits.
Currently this is based only on thought experiment.
I think rising pginodesteal and kswapd_inodesteal in /proc/vstat is sign of
inefficient memory reclaiming, because page-cache lru has a much more detailed
information about memory activity.
I plan to run some containers related tests/benchmarks,
something like multiple heavy-loaded web-servers.
>
>>
>> diff --git a/fs/inode.c b/fs/inode.c
>> index 1f6c48d..8d55a63 100644
>> --- a/fs/inode.c
>> +++ b/fs/inode.c
>> @@ -663,8 +663,8 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan)
>> spin_unlock(&inode->i_lock);
>> spin_unlock(&sb->s_inode_lru_lock);
>> if (remove_inode_buffers(inode))
>> - reap += invalidate_mapping_pages(&inode->i_data,
>> - 0, -1);
>> + reap += invalidate_inode_inactive_pages(
>> + &inode->i_data, 0, -1);
>> iput(inode);
>> spin_lock(&sb->s_inode_lru_lock);
>>
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index 0c4df26..05875d7 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -2211,6 +2211,8 @@ extern int invalidate_partition(struct gendisk *, int);
>> #endif
>> unsigned long invalidate_mapping_pages(struct address_space *mapping,
>> pgoff_t start, pgoff_t end);
>> +unsigned long invalidate_inode_inactive_pages(struct address_space *mapping,
>> + pgoff_t start, pgoff_t end);
>>
>> static inline void invalidate_remote_inode(struct inode *inode)
>> {
>> diff --git a/mm/truncate.c b/mm/truncate.c
>> index 632b15e..ac739bc 100644
>> --- a/mm/truncate.c
>> +++ b/mm/truncate.c
>> @@ -379,6 +379,52 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
>> EXPORT_SYMBOL(invalidate_mapping_pages);
>>
>> /*
>> + * This is like invalidate_mapping_pages(),
>> + * except it aborts invalidation at the first active page.
>> + */
>> +unsigned long invalidate_inode_inactive_pages(struct address_space *mapping,
>> + pgoff_t start, pgoff_t end)
>> +{
>> + struct pagevec pvec;
>> + pgoff_t index = start;
>> + unsigned long ret;
>> + unsigned long count = 0;
>> + int i;
>> +
>> + pagevec_init(&pvec, 0);
>> + while (index<= end&& pagevec_lookup(&pvec, mapping, index,
>> + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
>> +
>> + mem_cgroup_uncharge_start();
>> + for (i = 0; i< pagevec_count(&pvec); i++) {
>> + struct page *page = pvec.pages[i];
>> +
>> + if (PageActive(page)) {
>> + index = end;
>> + break;
>> + }
>> +
>> + /* We rely upon deletion not changing page->index */
>> + index = page->index;
>> + if (index> end)
>> + break;
>> +
>> + if (!trylock_page(page))
>> + continue;
>> + WARN_ON(page->index != index);
>> + ret = invalidate_inode_page(page);
>> + unlock_page(page);
>> + count += ret;
>> + }
>> + pagevec_release(&pvec);
>> + mem_cgroup_uncharge_end();
>> + cond_resched();
>> + index++;
>> + }
>> + return count;
>> +}
>
> We shouldn't just copy-n-paste invalidate_mapping_pages() like this.
> Can't we share the function by passing in a pointer to a callback
> function (invalidate_inode_page or a new
> invalidate_inode_page_unless_it_is_active).
>
Ok, I'll think how to implement this more accurate.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2011-11-18 8:14 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-11-16 14:47 [PATCH RFC] mm: abort inode pruning if it has active pages Konstantin Khlebnikov
2011-11-18 0:30 ` Andrew Morton
2011-11-18 8:14 ` Konstantin Khlebnikov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).