From mboxrd@z Thu Jan 1 00:00:00 1970 From: j.glisse@gmail.com Subject: [PATCH 05/11] mm/memcg: support accounting null page and transfering null charge to new page. Date: Fri, 2 May 2014 09:52:04 -0400 Message-ID: <1399038730-25641-6-git-send-email-j.glisse@gmail.com> References: <1399038730-25641-1-git-send-email-j.glisse@gmail.com> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable Cc: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= To: linux-mm@kvack.org, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org Return-path: In-Reply-To: <1399038730-25641-1-git-send-email-j.glisse@gmail.com> Sender: owner-linux-mm@kvack.org List-Id: linux-fsdevel.vger.kernel.org From: J=C3=A9r=C3=B4me Glisse When migrating memory to some device specific memory we still want to pro= perly account memcg memory usage. To do so we need to be able to account for pa= ge not allocated in system memory. We also need to be able to transfer previous = charge from device memory to a page in an atomic way from memcg point of view. Also introduce helper function to clear page memcg. Signed-off-by: J=C3=A9r=C3=B4me Glisse --- include/linux/memcontrol.h | 17 +++++ mm/memcontrol.c | 161 +++++++++++++++++++++++++++++++++++++++= ------ 2 files changed, 159 insertions(+), 19 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1fa2324..1737323 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -67,6 +67,8 @@ struct mem_cgroup_reclaim_cookie { =20 extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *m= m, gfp_t gfp_mask); +extern void mem_cgroup_transfer_charge_anon(struct page *page, + struct mm_struct *mm); /* for swap handling */ extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, gfp_t mask, struct mem_cgroup **memcgp); @@ -85,6 +87,8 @@ extern void mem_cgroup_uncharge_start(void); extern void mem_cgroup_uncharge_end(void); =20 extern void mem_cgroup_uncharge_page(struct page *page); +extern void mem_cgroup_uncharge_mm(struct mm_struct *mm); +extern void mem_cgroup_clear_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); =20 bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, @@ -245,6 +249,11 @@ static inline int mem_cgroup_charge_file(struct page= *page, return 0; } =20 +static inline void mem_cgroup_transfer_charge_anon(struct page *page, + struct mm_struct *mm) +{ +} + static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp) { @@ -272,6 +281,14 @@ static inline void mem_cgroup_uncharge_page(struct p= age *page) { } =20 +static inline void mem_cgroup_uncharge_mm(struct mm_struct *mm) +{ +} + +static inline void mem_cgroup_clear_page(struct page *page) +{ +} + static inline void mem_cgroup_uncharge_cache_page(struct page *page) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 19d620b..ceaf4d7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -940,7 +940,7 @@ static void mem_cgroup_charge_statistics(struct mem_c= group *memcg, __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages); =20 - if (PageTransHuge(page)) + if (page && PageTransHuge(page)) __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_pages); =20 @@ -2842,12 +2842,17 @@ static void __mem_cgroup_commit_charge(struct mem= _cgroup *memcg, enum charge_type ctype, bool lrucare) { - struct page_cgroup *pc =3D lookup_page_cgroup(page); + struct page_cgroup *pc; struct zone *uninitialized_var(zone); struct lruvec *lruvec; bool was_on_lru =3D false; bool anon; =20 + if (page =3D=3D NULL) { + goto charge; + } + + pc =3D lookup_page_cgroup(page); lock_page_cgroup(pc); VM_BUG_ON_PAGE(PageCgroupUsed(pc), page); /* @@ -2891,20 +2896,24 @@ static void __mem_cgroup_commit_charge(struct mem= _cgroup *memcg, spin_unlock_irq(&zone->lru_lock); } =20 +charge: if (ctype =3D=3D MEM_CGROUP_CHARGE_TYPE_ANON) anon =3D true; else anon =3D false; =20 mem_cgroup_charge_statistics(memcg, page, anon, nr_pages); - unlock_page_cgroup(pc); =20 - /* - * "charge_statistics" updated event counter. Then, check it. - * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. - * if they exceeds softlimit. - */ - memcg_check_events(memcg, page); + if (page) { + unlock_page_cgroup(pc); + + /* + * "charge_statistics" updated event counter. Then, check it. + * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. + * if they exceeds softlimit. + */ + memcg_check_events(memcg, page); + } } =20 static DEFINE_MUTEX(set_limit_mutex); @@ -3745,20 +3754,23 @@ int mem_cgroup_charge_anon(struct page *page, if (mem_cgroup_disabled()) return 0; =20 - VM_BUG_ON_PAGE(page_mapped(page), page); - VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page); VM_BUG_ON(!mm); + if (page) { + VM_BUG_ON_PAGE(page_mapped(page), page); + VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page); =20 - if (PageTransHuge(page)) { - nr_pages <<=3D compound_order(page); - VM_BUG_ON_PAGE(!PageTransHuge(page), page); - /* - * Never OOM-kill a process for a huge page. The - * fault handler will fall back to regular pages. - */ - oom =3D false; + if (PageTransHuge(page)) { + nr_pages <<=3D compound_order(page); + VM_BUG_ON_PAGE(!PageTransHuge(page), page); + /* + * Never OOM-kill a process for a huge page. The + * fault handler will fall back to regular pages. + */ + oom =3D false; + } } =20 + memcg =3D mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom); if (!memcg) return -ENOMEM; @@ -3767,6 +3779,60 @@ int mem_cgroup_charge_anon(struct page *page, return 0; } =20 +void mem_cgroup_transfer_charge_anon(struct page *page, struct mm_struct= *mm) +{ + struct page_cgroup *pc; + struct task_struct *task; + struct mem_cgroup *memcg; + struct zone *uninitialized_var(zone); + + if (mem_cgroup_disabled()) + return; + + VM_BUG_ON(page->mapping && !PageAnon(page)); + VM_BUG_ON(!mm); + + rcu_read_lock(); + task =3D rcu_dereference(mm->owner); + /* + * Because we don't have task_lock(), "p" can exit. + * In that case, "memcg" can point to root or p can be NULL with + * race with swapoff. Then, we have small risk of mis-accouning. + * But such kind of mis-account by race always happens because + * we don't have cgroup_mutex(). It's overkill and we allo that + * small race, here. + * (*) swapoff at el will charge against mm-struct not against + * task-struct. So, mm->owner can be NULL. + */ + memcg =3D mem_cgroup_from_task(task); + if (!memcg) { + memcg =3D root_mem_cgroup; + } + rcu_read_unlock(); + + pc =3D lookup_page_cgroup(page); + lock_page_cgroup(pc); + VM_BUG_ON(PageCgroupUsed(pc)); + /* + * we don't need page_cgroup_lock about tail pages, becase they are not + * accessed by any other context at this point. + */ + + pc->mem_cgroup =3D memcg; + /* + * We access a page_cgroup asynchronously without lock_page_cgroup(). + * Especially when a page_cgroup is taken from a page, pc->mem_cgroup + * is accessed after testing USED bit. To make pc->mem_cgroup visible + * before USED bit, we need memory barrier here. + * See mem_cgroup_add_lru_list(), etc. + */ + smp_wmb(); + SetPageCgroupUsed(pc); + + unlock_page_cgroup(pc); + memcg_check_events(memcg, page); +} + /* * While swap-in, try_charge -> commit or cancel, the page is locked. * And when try_charge() successfully returns, one refcnt to memcg witho= ut @@ -4087,6 +4153,63 @@ void mem_cgroup_uncharge_page(struct page *page) __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false); } =20 +void mem_cgroup_uncharge_mm(struct mm_struct *mm) +{ + struct mem_cgroup *memcg; + struct task_struct *task; + + if (mem_cgroup_disabled()) + return; + + VM_BUG_ON(!mm); + + rcu_read_lock(); + task =3D rcu_dereference(mm->owner); + /* + * Because we don't have task_lock(), "p" can exit. + * In that case, "memcg" can point to root or p can be NULL with + * race with swapoff. Then, we have small risk of mis-accouning. + * But such kind of mis-account by race always happens because + * we don't have cgroup_mutex(). It's overkill and we allo that + * small race, here. + * (*) swapoff at el will charge against mm-struct not against + * task-struct. So, mm->owner can be NULL. + */ + memcg =3D mem_cgroup_from_task(task); + if (!memcg) { + memcg =3D root_mem_cgroup; + } + rcu_read_unlock(); + + mem_cgroup_charge_statistics(memcg, NULL, true, -1); + if (!mem_cgroup_is_root(memcg)) + mem_cgroup_do_uncharge(memcg, 1, MEM_CGROUP_CHARGE_TYPE_ANON); +} + +void mem_cgroup_clear_page(struct page *page) +{ + struct page_cgroup *pc; + + if (mem_cgroup_disabled()) + return; + + /* + * Check if our page_cgroup is valid + */ + pc =3D lookup_page_cgroup(page); + if (unlikely(!PageCgroupUsed(pc))) + return; + lock_page_cgroup(pc); + ClearPageCgroupUsed(pc); + /* + * pc->mem_cgroup is not cleared here. It will be accessed when it's + * freed from LRU. This is safe because uncharged page is expected not + * to be reused (freed soon). Exception is SwapCache, it's handled by + * special functions. + */ + unlock_page_cgroup(pc); +} + void mem_cgroup_uncharge_cache_page(struct page *page) { VM_BUG_ON_PAGE(page_mapped(page), page); --=20 1.9.0 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org