From: akpm@linux-foundation.org
To: mm-commits@vger.kernel.org
Cc: kamezawa.hiroyu@jp.fujitsu.com, balbir@linux.vnet.ibm.com,
gthelen@google.com, minchan.kim@gmail.com,
nishimura@mxp.nes.nec.co.jp
Subject: + memcg-reduce-lock-hold-time-during-charge-moving.patch added to -mm tree
Date: Thu, 07 Oct 2010 16:15:29 -0700 [thread overview]
Message-ID: <201010072315.o97NFT8a030304@imap1.linux-foundation.org> (raw)
The patch titled
memcg: reduce lock hold time during charge moving
has been added to the -mm tree. Its filename is
memcg-reduce-lock-hold-time-during-charge-moving.patch
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/SubmitChecklist when testing your code ***
See http://userweb.kernel.org/~akpm/stuff/added-to-mm.txt to find
out what to do about this
The current -mm tree may be found at http://userweb.kernel.org/~akpm/mmotm/
------------------------------------------------------
Subject: memcg: reduce lock hold time during charge moving
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Presently during task migration among cgroups, memory cgroup scans page
tables and moves accounting if flags are properly set.
The core code, mem_cgroup_move_charge_pte_range() does
pte_offset_map_lock();
for all ptes in a page table:
1. look into page table, find_and_get a page
2. remove it from LRU.
3. move charge.
4. putback to LRU. put_page()
pte_offset_map_unlock();
for pte entries on a 3rd level? page table.
This pte_offset_map_lock seems a bit long. This patch modifies a rountine as
for 32 pages: pte_offset_map_lock()
find_and_get a page
record it
pte_offset_map_unlock()
for all recorded pages
isolate it from LRU.
move charge
putback to LRU
for all recorded pages
put_page()
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/memcontrol.c | 95 ++++++++++++++++++++++++++++------------------
1 file changed, 59 insertions(+), 36 deletions(-)
diff -puN mm/memcontrol.c~memcg-reduce-lock-hold-time-during-charge-moving mm/memcontrol.c
--- a/mm/memcontrol.c~memcg-reduce-lock-hold-time-during-charge-moving
+++ a/mm/memcontrol.c
@@ -276,6 +276,21 @@ enum move_type {
NR_MOVE_TYPE,
};
+enum mc_target_type {
+ MC_TARGET_NONE, /* used as failure code(0) */
+ MC_TARGET_PAGE,
+ MC_TARGET_SWAP,
+};
+
+struct mc_target {
+ enum mc_target_type type;
+ union {
+ struct page *page;
+ swp_entry_t ent;
+ } val;
+};
+#define MC_MOVE_ONCE (32)
+
/* "mc" and its members are protected by cgroup_mutex */
static struct move_charge_struct {
spinlock_t lock; /* for from, to, moving_task */
@@ -284,6 +299,7 @@ static struct move_charge_struct {
unsigned long precharge;
unsigned long moved_charge;
unsigned long moved_swap;
+ struct mc_target target[MC_MOVE_ONCE];
struct task_struct *moving_task; /* a task moving charges */
wait_queue_head_t waitq; /* a waitq for other context */
} mc = {
@@ -291,6 +307,7 @@ static struct move_charge_struct {
.waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq),
};
+
static bool move_anon(void)
{
return test_bit(MOVE_CHARGE_TYPE_ANON,
@@ -4479,16 +4496,7 @@ one_by_one:
*
* Called with pte lock held.
*/
-union mc_target {
- struct page *page;
- swp_entry_t ent;
-};
-enum mc_target_type {
- MC_TARGET_NONE, /* not used */
- MC_TARGET_PAGE,
- MC_TARGET_SWAP,
-};
static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t ptent)
@@ -4565,7 +4573,7 @@ static struct page *mc_handle_file_pte(s
}
static int is_target_pte_for_mc(struct vm_area_struct *vma,
- unsigned long addr, pte_t ptent, union mc_target *target)
+ unsigned long addr, pte_t ptent, struct mc_target *target)
{
struct page *page = NULL;
struct page_cgroup *pc;
@@ -4591,7 +4599,7 @@ static int is_target_pte_for_mc(struct v
if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
if (target)
- target->page = page;
+ target->val.page = page;
}
if (!ret || !target)
put_page(page);
@@ -4601,8 +4609,10 @@ static int is_target_pte_for_mc(struct v
css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
ret = MC_TARGET_SWAP;
if (target)
- target->ent = ent;
+ target->val.ent = ent;
}
+ if (target)
+ target->type = ret;
return ret;
}
@@ -4763,26 +4773,42 @@ static int mem_cgroup_move_charge_pte_ra
struct vm_area_struct *vma = walk->private;
pte_t *pte;
spinlock_t *ptl;
+ int index, num;
retry:
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
- for (; addr != end; addr += PAGE_SIZE) {
+ for (num = 0; num < MC_MOVE_ONCE && addr != end; addr += PAGE_SIZE) {
pte_t ptent = *(pte++);
- union mc_target target;
- int type;
+ ret = is_target_pte_for_mc(vma, addr, ptent, &mc.target[num]);
+ if (!ret)
+ continue;
+ mc.target[num++].type = ret;
+ }
+ pte_unmap_unlock(pte - 1, ptl);
+ cond_resched();
+
+ ret = 0;
+ index = 0;
+ do {
+ struct mc_target *mt;
struct page *page;
struct page_cgroup *pc;
swp_entry_t ent;
- if (!mc.precharge)
- break;
+ if (!mc.precharge) {
+ ret = mem_cgroup_do_precharge(1);
+ if (ret)
+ goto out;
+ continue;
+ }
+
+ mt = &mc.target[index++];
- type = is_target_pte_for_mc(vma, addr, ptent, &target);
- switch (type) {
+ switch (mt->type) {
case MC_TARGET_PAGE:
- page = target.page;
+ page = mt->val.page;
if (isolate_lru_page(page))
- goto put;
+ break;
pc = lookup_page_cgroup(page);
if (!mem_cgroup_move_account(pc,
mc.from, mc.to, false)) {
@@ -4791,11 +4817,9 @@ retry:
mc.moved_charge++;
}
putback_lru_page(page);
-put: /* is_target_pte_for_mc() gets the page */
- put_page(page);
break;
case MC_TARGET_SWAP:
- ent = target.ent;
+ ent = mt->val.ent;
if (!mem_cgroup_move_swap_account(ent,
mc.from, mc.to, false)) {
mc.precharge--;
@@ -4806,21 +4830,20 @@ put: /* is_target_pte_for_mc() gets th
default:
break;
}
+ } while (index < num);
+out:
+ for (index = 0; index < num; index++) {
+ if (mc.target[index].type == MC_TARGET_PAGE)
+ put_page(mc.target[index].val.page);
+ mc.target[index].type = MC_TARGET_NONE;
}
- pte_unmap_unlock(pte - 1, ptl);
+
+ if (ret)
+ return ret;
cond_resched();
- if (addr != end) {
- /*
- * We have consumed all precharges we got in can_attach().
- * We try charge one by one, but don't do any additional
- * charges to mc.to if we have failed in charge once in attach()
- * phase.
- */
- ret = mem_cgroup_do_precharge(1);
- if (!ret)
- goto retry;
- }
+ if (addr != end)
+ goto retry;
return ret;
}
_
Patches currently in -mm which might be from kamezawa.hiroyu@jp.fujitsu.com are
memcg-fix-thresholds-with-use_hierarchy-==-1.patch
linux-next.patch
vfs-introduce-fmode_neg_offset-for-allowing-negative-f_pos.patch
oom-add-per-mm-oom-disable-count.patch
oom-add-per-mm-oom-disable-count-protect-oom_disable_count-with-task_lock-in-fork.patch
oom-add-per-mm-oom-disable-count-use-old_mm-for-oom_disable_count-in-exec.patch
oom-avoid-killing-a-task-if-a-thread-sharing-its-mm-cannot-be-killed.patch
oom-kill-all-threads-sharing-oom-killed-tasks-mm.patch
oom-kill-all-threads-sharing-oom-killed-tasks-mm-fix.patch
oom-kill-all-threads-sharing-oom-killed-tasks-mm-fix-fix.patch
oom-rewrite-error-handling-for-oom_adj-and-oom_score_adj-tunables.patch
oom-fix-locking-for-oom_adj-and-oom_score_adj.patch
memory-hotplug-fix-notifiers-return-value-check.patch
memory-hotplug-unify-is_removable-and-offline-detection-code.patch
memory-hotplug-unify-is_removable-and-offline-detection-code-checkpatch-fixes.patch
tracing-vmscan-add-trace-events-for-lru-list-shrinking.patch
writeback-account-for-time-spent-congestion_waited.patch
vmscan-synchronous-lumpy-reclaim-should-not-call-congestion_wait.patch
vmscan-narrow-the-scenarios-lumpy-reclaim-uses-synchrounous-reclaim.patch
vmscan-remove-dead-code-in-shrink_inactive_list.patch
vmscan-isolated_lru_pages-stop-neighbour-search-if-neighbour-cannot-be-isolated.patch
writeback-do-not-sleep-on-the-congestion-queue-if-there-are-no-congested-bdis.patch
writeback-do-not-sleep-on-the-congestion-queue-if-there-are-no-congested-bdis-or-if-significant-congestion-is-not-being-encountered-in-the-current-zone.patch
writeback-do-not-sleep-on-the-congestion-queue-if-there-are-no-congested-bdis-or-if-significant-congestion-is-not-being-encounted-in-the-current-zone-fix.patch
memcg-fix-race-in-file_mapped-accouting-flag-management.patch
memcg-avoid-lock-in-updating-file_mapped-was-fix-race-in-file_mapped-accouting-flag-management.patch
memcg-use-for_each_mem_cgroup.patch
memcg-cpu-hotplug-aware-percpu-count-updates.patch
memcg-cpu-hotplug-aware-percpu-count-updates-fix.patch
memcg-cpu-hotplug-aware-quick-acount_move-detection.patch
memcg-cpu-hotplug-aware-quick-acount_move-detection-checkpatch-fixes.patch
memcg-generic-filestat-update-interface.patch
memcg-reduce-lock-hold-time-during-charge-moving.patch
next reply other threads:[~2010-10-07 23:16 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-10-07 23:15 akpm [this message]
2010-10-15 0:47 ` + memcg-reduce-lock-hold-time-during-charge-moving.patch added to -mm tree KAMEZAWA Hiroyuki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=201010072315.o97NFT8a030304@imap1.linux-foundation.org \
--to=akpm@linux-foundation.org \
--cc=balbir@linux.vnet.ibm.com \
--cc=gthelen@google.com \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=minchan.kim@gmail.com \
--cc=mm-commits@vger.kernel.org \
--cc=nishimura@mxp.nes.nec.co.jp \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.