From: Mel Gorman <mgorman@techsingularity.net>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.cz>, Minchan Kim <minchan@kernel.org>,
Vladimir Davydov <vdavydov@virtuozzo.com>,
Dave Chinner <david@fromorbit.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Vlastimil Babka <vbabka@suse.cz>,
Andrew Morton <akpm@linux-foundation.org>,
Bob Peterson <rpeterso@redhat.com>,
"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
"Huang, Ying" <ying.huang@intel.com>,
Christoph Hellwig <hch@lst.de>,
Wu Fengguang <fengguang.wu@intel.com>, LKP <lkp@01.org>,
Tejun Heo <tj@kernel.org>, LKML <linux-kernel@vger.kernel.org>
Subject: Re: [LKP] [lkp] [xfs] 68a9f5e700: aim7.jobs-per-min -13.6% regression
Date: Thu, 18 Aug 2016 01:45:17 +0100 [thread overview]
Message-ID: <20160818004517.GJ8119@techsingularity.net> (raw)
In-Reply-To: <20160817154907.GI8119@techsingularity.net>
On Wed, Aug 17, 2016 at 04:49:07PM +0100, Mel Gorman wrote:
> > Yes, we could try to batch the locking like DaveC already suggested
> > (ie we could move the locking to the caller, and then make
> > shrink_page_list() just try to keep the lock held for a few pages if
> > the mapping doesn't change), and that might result in fewer crazy
> > cacheline ping-pongs overall. But that feels like exactly the wrong
> > kind of workaround.
> >
>
> Even if such batching was implemented, it would be very specific to the
> case of a single large file filling LRUs on multiple nodes.
>
The latest Jason Bourne movie was sufficiently bad that I spent time
thinking how the tree_lock could be batched during reclaim. It's not
straight-forward but this prototype did not blow up on UMA and may be
worth considering if Dave can test either approach has a positive impact.
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 374d95d04178..926110219cd9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -621,19 +621,39 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
return PAGE_CLEAN;
}
+static void finalise_remove_mapping(struct list_head *swapcache,
+ struct list_head *filecache,
+ void (*freepage)(struct page *))
+{
+ struct page *page;
+
+ while (!list_empty(swapcache)) {
+ swp_entry_t swap = { .val = page_private(page) };
+ page = lru_to_page(swapcache);
+ list_del(&page->lru);
+ swapcache_free(swap);
+ set_page_private(page, 0);
+ }
+
+ while (!list_empty(filecache)) {
+ page = lru_to_page(swapcache);
+ list_del(&page->lru);
+ freepage(page);
+ }
+}
+
/*
* Same as remove_mapping, but if the page is removed from the mapping, it
* gets returned with a refcount of 0.
*/
-static int __remove_mapping(struct address_space *mapping, struct page *page,
- bool reclaimed)
+static int __remove_mapping_page(struct address_space *mapping,
+ struct page *page, bool reclaimed,
+ struct list_head *swapcache,
+ struct list_head *filecache)
{
- unsigned long flags;
-
BUG_ON(!PageLocked(page));
BUG_ON(mapping != page_mapping(page));
- spin_lock_irqsave(&mapping->tree_lock, flags);
/*
* The non racy check for a busy page.
*
@@ -668,16 +688,18 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
}
if (PageSwapCache(page)) {
- swp_entry_t swap = { .val = page_private(page) };
+ unsigned long swapval = page_private(page);
+ swp_entry_t swap = { .val = swapval };
mem_cgroup_swapout(page, swap);
__delete_from_swap_cache(page);
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
- swapcache_free(swap);
+ set_page_private(page, swapval);
+ list_add(&page->lru, swapcache);
} else {
- void (*freepage)(struct page *);
void *shadow = NULL;
+ void (*freepage)(struct page *);
freepage = mapping->a_ops->freepage;
+
/*
* Remember a shadow entry for reclaimed file cache in
* order to detect refaults, thus thrashing, later on.
@@ -698,16 +720,13 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
!mapping_exiting(mapping) && !dax_mapping(mapping))
shadow = workingset_eviction(mapping, page);
__delete_from_page_cache(page, shadow);
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
-
- if (freepage != NULL)
- freepage(page);
+ if (freepage)
+ list_add(&page->lru, filecache);
}
return 1;
cannot_free:
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
return 0;
}
@@ -719,16 +738,68 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
*/
int remove_mapping(struct address_space *mapping, struct page *page)
{
- if (__remove_mapping(mapping, page, false)) {
+ unsigned long flags;
+ LIST_HEAD(swapcache);
+ LIST_HEAD(filecache);
+ void (*freepage)(struct page *);
+ int ret = 0;
+
+ spin_lock_irqsave(&mapping->tree_lock, flags);
+ freepage = mapping->a_ops->freepage;
+
+ if (__remove_mapping_page(mapping, page, false, &swapcache, &filecache)) {
/*
* Unfreezing the refcount with 1 rather than 2 effectively
* drops the pagecache ref for us without requiring another
* atomic operation.
*/
page_ref_unfreeze(page, 1);
- return 1;
+ ret = 1;
+ }
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ finalise_remove_mapping(&swapcache, &filecache, freepage);
+ return ret;
+}
+
+static void remove_mapping_list(struct list_head *mapping_list,
+ struct list_head *free_pages,
+ struct list_head *ret_pages)
+{
+ unsigned long flags;
+ struct address_space *mapping = NULL;
+ void (*freepage)(struct page *);
+ LIST_HEAD(swapcache);
+ LIST_HEAD(filecache);
+ struct page *page;
+
+ while (!list_empty(mapping_list)) {
+ page = lru_to_page(mapping_list);
+ list_del(&page->lru);
+
+ if (!mapping || page->mapping != mapping) {
+ if (mapping) {
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ finalise_remove_mapping(&swapcache, &filecache, freepage);
+ }
+
+ mapping = page->mapping;
+ spin_lock_irqsave(&mapping->tree_lock, flags);
+ freepage = mapping->a_ops->freepage;
+ }
+
+ if (!__remove_mapping_page(mapping, page, true, &swapcache, &filecache)) {
+ unlock_page(page);
+ list_add(&page->lru, ret_pages);
+ } else {
+ __ClearPageLocked(page);
+ list_add(&page->lru, free_pages);
+ }
+ }
+
+ if (mapping) {
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ finalise_remove_mapping(&swapcache, &filecache, freepage);
}
- return 0;
}
/**
@@ -910,6 +981,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
{
LIST_HEAD(ret_pages);
LIST_HEAD(free_pages);
+ LIST_HEAD(mapping_pages);
int pgactivate = 0;
unsigned long nr_unqueued_dirty = 0;
unsigned long nr_dirty = 0;
@@ -1206,17 +1278,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
}
lazyfree:
- if (!mapping || !__remove_mapping(mapping, page, true))
+ if (!mapping)
goto keep_locked;
- /*
- * At this point, we have no other references and there is
- * no way to pick any more up (removed from LRU, removed
- * from pagecache). Can use non-atomic bitops now (and
- * we obviously don't have to worry about waking up a process
- * waiting on the page lock, because there are no references.
- */
- __ClearPageLocked(page);
+ list_add(&page->lru, &mapping_pages);
+ if (ret == SWAP_LZFREE)
+ count_vm_event(PGLAZYFREED);
+ continue;
+
free_it:
if (ret == SWAP_LZFREE)
count_vm_event(PGLAZYFREED);
@@ -1251,6 +1320,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
}
+ remove_mapping_list(&mapping_pages, &free_pages, &ret_pages);
mem_cgroup_uncharge_list(&free_pages);
try_to_unmap_flush();
free_hot_cold_page_list(&free_pages, true);
next prev parent reply other threads:[~2016-08-18 0:45 UTC|newest]
Thread overview: 109+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-08-09 14:33 [lkp] [xfs] 68a9f5e700: aim7.jobs-per-min -13.6% regression kernel test robot
2016-08-10 18:24 ` Linus Torvalds
2016-08-10 23:08 ` Dave Chinner
2016-08-10 23:51 ` Linus Torvalds
2016-08-10 23:58 ` [LKP] " Huang, Ying
2016-08-11 0:11 ` Huang, Ying
2016-08-11 0:23 ` Linus Torvalds
2016-08-11 0:33 ` Huang, Ying
2016-08-11 1:00 ` Linus Torvalds
2016-08-11 4:46 ` Dave Chinner
2016-08-15 17:22 ` Huang, Ying
2016-08-16 0:08 ` Dave Chinner
2016-08-11 15:57 ` Christoph Hellwig
2016-08-11 16:55 ` Linus Torvalds
2016-08-11 17:51 ` Huang, Ying
2016-08-11 19:51 ` Linus Torvalds
2016-08-11 20:00 ` Christoph Hellwig
2016-08-11 20:35 ` Linus Torvalds
2016-08-11 22:16 ` Al Viro
2016-08-11 22:30 ` Linus Torvalds
2016-08-11 21:16 ` Huang, Ying
2016-08-11 21:40 ` Linus Torvalds
2016-08-11 22:08 ` Christoph Hellwig
2016-08-12 0:54 ` Dave Chinner
2016-08-12 2:23 ` Dave Chinner
2016-08-12 2:32 ` Linus Torvalds
2016-08-12 2:52 ` Christoph Hellwig
2016-08-12 3:20 ` Linus Torvalds
2016-08-12 4:16 ` Dave Chinner
2016-08-12 5:02 ` Linus Torvalds
2016-08-12 6:04 ` Dave Chinner
2016-08-12 6:29 ` Ye Xiaolong
2016-08-12 8:51 ` Ye Xiaolong
2016-08-12 10:02 ` Dave Chinner
2016-08-12 10:43 ` Fengguang Wu
2016-08-13 0:30 ` [LKP] [lkp] " Christoph Hellwig
2016-08-13 21:48 ` Christoph Hellwig
2016-08-13 22:07 ` Fengguang Wu
2016-08-13 22:15 ` Christoph Hellwig
2016-08-13 22:51 ` Fengguang Wu
2016-08-14 14:50 ` Fengguang Wu
2016-08-14 16:17 ` Christoph Hellwig
2016-08-14 23:46 ` Dave Chinner
2016-08-14 23:57 ` Fengguang Wu
2016-08-15 14:14 ` Fengguang Wu
2016-08-15 21:22 ` Dave Chinner
2016-08-16 12:20 ` Fengguang Wu
2016-08-15 20:30 ` Huang, Ying
2016-08-22 22:09 ` Huang, Ying
2016-09-26 6:25 ` Huang, Ying
2016-09-26 14:55 ` Christoph Hellwig
2016-09-27 0:52 ` Huang, Ying
2016-08-16 13:25 ` Fengguang Wu
2016-08-13 23:32 ` Dave Chinner
2016-08-12 2:27 ` Linus Torvalds
2016-08-12 3:56 ` Dave Chinner
2016-08-12 18:03 ` Linus Torvalds
2016-08-13 23:58 ` Fengguang Wu
2016-08-15 0:48 ` Dave Chinner
2016-08-15 1:37 ` Linus Torvalds
2016-08-15 2:28 ` Dave Chinner
2016-08-15 2:53 ` Linus Torvalds
2016-08-15 5:00 ` Dave Chinner
[not found] ` <CA+55aFwva2Xffai+Eqv1Jn_NGryk3YJ2i5JoHOQnbQv6qVPAsw@mail.gmail.com>
[not found] ` <CA+55aFy14nUnJQ_GdF=j8Fa9xiH70c6fY2G3q5HQ01+8z1z3qQ@mail.gmail.com>
[not found] ` <CA+55aFxp+rLehC8c157uRbH459wUC1rRPfCVgvmcq5BrG9gkyg@mail.gmail.com>
2016-08-15 22:22 ` Dave Chinner
2016-08-15 22:42 ` Dave Chinner
2016-08-15 23:20 ` Linus Torvalds
2016-08-15 23:48 ` Linus Torvalds
2016-08-16 0:44 ` Dave Chinner
2016-08-16 15:05 ` Mel Gorman
2016-08-16 17:47 ` Linus Torvalds
2016-08-17 15:48 ` Michal Hocko
2016-08-17 16:42 ` Michal Hocko
2016-08-17 15:49 ` Mel Gorman
2016-08-18 0:45 ` Mel Gorman [this message]
2016-08-18 7:11 ` Dave Chinner
2016-08-18 13:24 ` Mel Gorman
2016-08-18 17:55 ` Linus Torvalds
2016-08-18 21:19 ` Dave Chinner
2016-08-18 22:25 ` Linus Torvalds
2016-08-19 9:00 ` Michal Hocko
2016-08-19 10:49 ` Mel Gorman
2016-08-19 23:48 ` Dave Chinner
2016-08-20 1:08 ` Linus Torvalds
2016-08-20 12:16 ` Mel Gorman
2016-08-19 15:08 ` Mel Gorman
2016-09-01 23:32 ` Dave Chinner
2016-09-06 15:37 ` Mel Gorman
2016-09-06 15:52 ` Huang, Ying
2016-08-24 15:40 ` Huang, Ying
2016-08-25 9:37 ` Mel Gorman
2016-08-18 2:44 ` Dave Chinner
2016-08-16 0:15 ` Linus Torvalds
2016-08-16 0:38 ` Dave Chinner
2016-08-16 0:50 ` Linus Torvalds
2016-08-16 0:19 ` Dave Chinner
2016-08-16 1:51 ` Linus Torvalds
2016-08-16 22:02 ` Dave Chinner
2016-08-16 23:23 ` Linus Torvalds
2016-08-15 23:01 ` Linus Torvalds
2016-08-16 0:17 ` Dave Chinner
2016-08-16 0:45 ` Linus Torvalds
2016-08-15 5:03 ` Ingo Molnar
2016-08-17 16:24 ` Peter Zijlstra
2016-08-15 12:58 ` Fengguang Wu
2016-08-11 1:16 ` Dave Chinner
2016-08-11 1:32 ` Dave Chinner
2016-08-11 2:36 ` Ye Xiaolong
2016-08-11 3:05 ` Dave Chinner
2016-08-12 1:26 ` Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160818004517.GJ8119@techsingularity.net \
--to=mgorman@techsingularity.net \
--cc=akpm@linux-foundation.org \
--cc=david@fromorbit.com \
--cc=fengguang.wu@intel.com \
--cc=hannes@cmpxchg.org \
--cc=hch@lst.de \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=lkp@01.org \
--cc=mhocko@suse.cz \
--cc=minchan@kernel.org \
--cc=rpeterso@redhat.com \
--cc=tj@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=vbabka@suse.cz \
--cc=vdavydov@virtuozzo.com \
--cc=ying.huang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox