* [PATCH -mm v2 2/2] mm: get rid of __GFP_KMEMCG
@ 2014-04-01 7:38 ` Vladimir Davydov
0 siblings, 0 replies; 32+ messages in thread
From: Vladimir Davydov @ 2014-04-01 7:38 UTC (permalink / raw)
To: akpm; +Cc: hannes, mhocko, glommer, gthelen, linux-kernel, linux-mm, devel
Currently to allocate a page that should be charged to kmemcg (e.g.
threadinfo), we pass __GFP_KMEMCG flag to the page allocator. The page
allocated is then to be freed by free_memcg_kmem_pages. Apart from
looking asymmetrical, this also requires intrusion to the general
allocation path. So let's introduce separate functions that will
alloc/free pages charged to kmemcg.
The new functions are called alloc_kmem_pages and free_kmem_pages. They
should be used when the caller actually would like to use kmalloc, but
has to fall back to the page allocator for the allocation is large. They
only differ from alloc_pages and free_pages in that besides allocating
or freeing pages they also charge them to the kmem resource counter of
the current memory cgroup.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
---
include/linux/gfp.h | 10 ++++---
include/linux/memcontrol.h | 2 +-
include/linux/slab.h | 11 --------
include/linux/thread_info.h | 2 --
include/trace/events/gfpflags.h | 1 -
kernel/fork.c | 6 ++---
mm/page_alloc.c | 56 ++++++++++++++++++++++++---------------
mm/slab_common.c | 12 +++++++++
mm/slub.c | 6 ++---
9 files changed, 60 insertions(+), 46 deletions(-)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 39b81dc7d01a..d382db71e300 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -31,7 +31,6 @@ struct vm_area_struct;
#define ___GFP_HARDWALL 0x20000u
#define ___GFP_THISNODE 0x40000u
#define ___GFP_RECLAIMABLE 0x80000u
-#define ___GFP_KMEMCG 0x100000u
#define ___GFP_NOTRACK 0x200000u
#define ___GFP_NO_KSWAPD 0x400000u
#define ___GFP_OTHER_NODE 0x800000u
@@ -91,7 +90,6 @@ struct vm_area_struct;
#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
-#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
/*
@@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
alloc_pages_vma(gfp_mask, 0, vma, addr, node)
+extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
+extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
+ unsigned int order);
+
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
@@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order);
extern void free_hot_cold_page(struct page *page, int cold);
extern void free_hot_cold_page_list(struct list_head *list, int cold);
-extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
-extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
+extern void __free_kmem_pages(struct page *page, unsigned int order);
+extern void free_kmem_pages(unsigned long addr, unsigned int order);
#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr), 0)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 29068dd26c3d..13acdb5259f5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -543,7 +543,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
* res_counter_charge_nofail, but we hope those allocations are rare,
* and won't be worth the trouble.
*/
- if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
+ if (gfp & __GFP_NOFAIL)
return true;
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
return true;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 3dd389aa91c7..6d6959292e00 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -358,17 +358,6 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
#include <linux/slub_def.h>
#endif
-static __always_inline void *
-kmalloc_order(size_t size, gfp_t flags, unsigned int order)
-{
- void *ret;
-
- flags |= (__GFP_COMP | __GFP_KMEMCG);
- ret = (void *) __get_free_pages(flags, order);
- kmemleak_alloc(ret, size, 1, flags);
- return ret;
-}
-
#ifdef CONFIG_TRACING
extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
#else
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index fddbe2023a5d..1807bb194816 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -61,8 +61,6 @@ extern long do_no_restart_syscall(struct restart_block *parm);
# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
#endif
-#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
-
/*
* flag set/clear/test wrappers
* - pass TIF_xxxx constants to these functions
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index 1eddbf1557f2..d6fd8e5b14b7 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -34,7 +34,6 @@
{(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
{(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
{(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
- {(unsigned long)__GFP_KMEMCG, "GFP_KMEMCG"}, \
{(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
{(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
{(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
diff --git a/kernel/fork.c b/kernel/fork.c
index f4b09bc15f3a..a1632c878037 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -150,15 +150,15 @@ void __weak arch_release_thread_info(struct thread_info *ti)
static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
int node)
{
- struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
- THREAD_SIZE_ORDER);
+ struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
+ THREAD_SIZE_ORDER);
return page ? page_address(page) : NULL;
}
static inline void free_thread_info(struct thread_info *ti)
{
- free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+ free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
}
# else
static struct kmem_cache *thread_info_cache;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0327f9d5a8c0..41378986a1e6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2723,7 +2723,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
int migratetype = allocflags_to_migratetype(gfp_mask);
unsigned int cpuset_mems_cookie;
int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
- struct mem_cgroup *memcg = NULL;
gfp_mask &= gfp_allowed_mask;
@@ -2742,13 +2741,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (unlikely(!zonelist->_zonerefs->zone))
return NULL;
- /*
- * Will only have any effect when __GFP_KMEMCG is set. This is
- * verified in the (always inline) callee
- */
- if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
- return NULL;
-
retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
@@ -2810,8 +2802,6 @@ out:
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
goto retry_cpuset;
- memcg_kmem_commit_charge(page, memcg, order);
-
if (page)
set_page_owner(page, order, gfp_mask);
@@ -2868,27 +2858,51 @@ void free_pages(unsigned long addr, unsigned int order)
EXPORT_SYMBOL(free_pages);
/*
- * __free_memcg_kmem_pages and free_memcg_kmem_pages will free
- * pages allocated with __GFP_KMEMCG.
+ * alloc_kmem_pages charges newly allocated pages to the kmem resource counter
+ * of the current memory cgroup.
*
- * Those pages are accounted to a particular memcg, embedded in the
- * corresponding page_cgroup. To avoid adding a hit in the allocator to search
- * for that information only to find out that it is NULL for users who have no
- * interest in that whatsoever, we provide these functions.
- *
- * The caller knows better which flags it relies on.
+ * It should be used when the caller would like to use kmalloc, but since the
+ * allocation is large, it has to fall back to the page allocator.
+ */
+struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
+{
+ struct page *page;
+ struct mem_cgroup *memcg = NULL;
+
+ if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+ return NULL;
+ page = alloc_pages(gfp_mask, order);
+ memcg_kmem_commit_charge(page, memcg, order);
+ return page;
+}
+
+struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
+{
+ struct page *page;
+ struct mem_cgroup *memcg = NULL;
+
+ if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+ return NULL;
+ page = alloc_pages_node(nid, gfp_mask, order);
+ memcg_kmem_commit_charge(page, memcg, order);
+ return page;
+}
+
+/*
+ * __free_kmem_pages and free_kmem_pages will free pages allocated with
+ * alloc_kmem_pages.
*/
-void __free_memcg_kmem_pages(struct page *page, unsigned int order)
+void __free_kmem_pages(struct page *page, unsigned int order)
{
memcg_kmem_uncharge_pages(page, order);
__free_pages(page, order);
}
-void free_memcg_kmem_pages(unsigned long addr, unsigned int order)
+void free_kmem_pages(unsigned long addr, unsigned int order)
{
if (addr != 0) {
VM_BUG_ON(!virt_addr_valid((void *)addr));
- __free_memcg_kmem_pages(virt_to_page((void *)addr), order);
+ __free_kmem_pages(virt_to_page((void *)addr), order);
}
}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 6673597ac967..cab4c49b3e8c 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -573,6 +573,18 @@ void __init create_kmalloc_caches(unsigned long flags)
}
#endif /* !CONFIG_SLOB */
+void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
+{
+ void *ret;
+ struct page *page;
+
+ flags |= __GFP_COMP;
+ page = alloc_kmem_pages(flags, order);
+ ret = page ? page_address(page) : NULL;
+ kmemleak_alloc(ret, size, 1, flags);
+ return ret;
+}
+
#ifdef CONFIG_TRACING
void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
{
diff --git a/mm/slub.c b/mm/slub.c
index b203cfceff95..fa7a1817835e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3335,8 +3335,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
struct page *page;
void *ptr = NULL;
- flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
- page = alloc_pages_node(node, flags, get_order(size));
+ flags |= __GFP_COMP | __GFP_NOTRACK;
+ page = alloc_kmem_pages_node(node, flags, get_order(size));
if (page)
ptr = page_address(page);
@@ -3405,7 +3405,7 @@ void kfree(const void *x)
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
kfree_hook(x);
- __free_memcg_kmem_pages(page, compound_order(page));
+ __free_kmem_pages(page, compound_order(page));
return;
}
slab_free(page->slab_cache, page, object, _RET_IP_);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 32+ messages in thread* Re: [PATCH -mm v2 2/2] mm: get rid of __GFP_KMEMCG
2014-04-01 7:38 ` Vladimir Davydov
@ 2014-04-02 0:48 ` Greg Thelen
-1 siblings, 0 replies; 32+ messages in thread
From: Greg Thelen @ 2014-04-02 0:48 UTC (permalink / raw)
To: Vladimir Davydov
Cc: akpm, hannes, mhocko, glommer, linux-kernel, linux-mm, devel
On Tue, Apr 01 2014, Vladimir Davydov <vdavydov@parallels.com> wrote:
> Currently to allocate a page that should be charged to kmemcg (e.g.
> threadinfo), we pass __GFP_KMEMCG flag to the page allocator. The page
> allocated is then to be freed by free_memcg_kmem_pages. Apart from
> looking asymmetrical, this also requires intrusion to the general
> allocation path. So let's introduce separate functions that will
> alloc/free pages charged to kmemcg.
>
> The new functions are called alloc_kmem_pages and free_kmem_pages. They
> should be used when the caller actually would like to use kmalloc, but
> has to fall back to the page allocator for the allocation is large. They
> only differ from alloc_pages and free_pages in that besides allocating
> or freeing pages they also charge them to the kmem resource counter of
> the current memory cgroup.
>
> Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
> ---
> include/linux/gfp.h | 10 ++++---
> include/linux/memcontrol.h | 2 +-
> include/linux/slab.h | 11 --------
> include/linux/thread_info.h | 2 --
> include/trace/events/gfpflags.h | 1 -
> kernel/fork.c | 6 ++---
> mm/page_alloc.c | 56 ++++++++++++++++++++++++---------------
> mm/slab_common.c | 12 +++++++++
> mm/slub.c | 6 ++---
> 9 files changed, 60 insertions(+), 46 deletions(-)
>
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 39b81dc7d01a..d382db71e300 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -31,7 +31,6 @@ struct vm_area_struct;
> #define ___GFP_HARDWALL 0x20000u
> #define ___GFP_THISNODE 0x40000u
> #define ___GFP_RECLAIMABLE 0x80000u
> -#define ___GFP_KMEMCG 0x100000u
> #define ___GFP_NOTRACK 0x200000u
> #define ___GFP_NO_KSWAPD 0x400000u
> #define ___GFP_OTHER_NODE 0x800000u
> @@ -91,7 +90,6 @@ struct vm_area_struct;
>
> #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
> #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
> -#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
> #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
>
> /*
> @@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
> #define alloc_page_vma_node(gfp_mask, vma, addr, node) \
> alloc_pages_vma(gfp_mask, 0, vma, addr, node)
>
> +extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
> +extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
> + unsigned int order);
> +
> extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
> extern unsigned long get_zeroed_page(gfp_t gfp_mask);
>
> @@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order);
> extern void free_hot_cold_page(struct page *page, int cold);
> extern void free_hot_cold_page_list(struct list_head *list, int cold);
>
> -extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
> -extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
> +extern void __free_kmem_pages(struct page *page, unsigned int order);
> +extern void free_kmem_pages(unsigned long addr, unsigned int order);
>
> #define __free_page(page) __free_pages((page), 0)
> #define free_page(addr) free_pages((addr), 0)
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 29068dd26c3d..13acdb5259f5 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -543,7 +543,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
> * res_counter_charge_nofail, but we hope those allocations are rare,
> * and won't be worth the trouble.
> */
> - if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
> + if (gfp & __GFP_NOFAIL)
> return true;
> if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
> return true;
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 3dd389aa91c7..6d6959292e00 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -358,17 +358,6 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
> #include <linux/slub_def.h>
> #endif
>
> -static __always_inline void *
> -kmalloc_order(size_t size, gfp_t flags, unsigned int order)
> -{
> - void *ret;
> -
> - flags |= (__GFP_COMP | __GFP_KMEMCG);
> - ret = (void *) __get_free_pages(flags, order);
> - kmemleak_alloc(ret, size, 1, flags);
> - return ret;
> -}
> -
Removing this from the header file breaks builds without
CONFIG_TRACING.
Example:
% make allnoconfig && make -j4 mm/
[...]
include/linux/slab.h: In function a??kmalloc_order_tracea??:
include/linux/slab.h:367:2: error: implicit declaration of function a??kmalloc_ordera?? [-Werror=implicit-function-declaration]
> #ifdef CONFIG_TRACING
> extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
> #else
> diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
> index fddbe2023a5d..1807bb194816 100644
> --- a/include/linux/thread_info.h
> +++ b/include/linux/thread_info.h
> @@ -61,8 +61,6 @@ extern long do_no_restart_syscall(struct restart_block *parm);
> # define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
> #endif
>
> -#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
> -
> /*
> * flag set/clear/test wrappers
> * - pass TIF_xxxx constants to these functions
> diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
> index 1eddbf1557f2..d6fd8e5b14b7 100644
> --- a/include/trace/events/gfpflags.h
> +++ b/include/trace/events/gfpflags.h
> @@ -34,7 +34,6 @@
> {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
> {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
> {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
> - {(unsigned long)__GFP_KMEMCG, "GFP_KMEMCG"}, \
> {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
> {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
> {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
> diff --git a/kernel/fork.c b/kernel/fork.c
> index f4b09bc15f3a..a1632c878037 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -150,15 +150,15 @@ void __weak arch_release_thread_info(struct thread_info *ti)
> static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
> int node)
> {
> - struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
> - THREAD_SIZE_ORDER);
> + struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
> + THREAD_SIZE_ORDER);
>
> return page ? page_address(page) : NULL;
> }
>
> static inline void free_thread_info(struct thread_info *ti)
> {
> - free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
> + free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
> }
> # else
> static struct kmem_cache *thread_info_cache;
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 0327f9d5a8c0..41378986a1e6 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2723,7 +2723,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
> int migratetype = allocflags_to_migratetype(gfp_mask);
> unsigned int cpuset_mems_cookie;
> int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
> - struct mem_cgroup *memcg = NULL;
>
> gfp_mask &= gfp_allowed_mask;
>
> @@ -2742,13 +2741,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
> if (unlikely(!zonelist->_zonerefs->zone))
> return NULL;
>
> - /*
> - * Will only have any effect when __GFP_KMEMCG is set. This is
> - * verified in the (always inline) callee
> - */
> - if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
> - return NULL;
> -
> retry_cpuset:
> cpuset_mems_cookie = read_mems_allowed_begin();
>
> @@ -2810,8 +2802,6 @@ out:
> if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
> goto retry_cpuset;
>
> - memcg_kmem_commit_charge(page, memcg, order);
> -
> if (page)
> set_page_owner(page, order, gfp_mask);
>
> @@ -2868,27 +2858,51 @@ void free_pages(unsigned long addr, unsigned int order)
> EXPORT_SYMBOL(free_pages);
>
> /*
> - * __free_memcg_kmem_pages and free_memcg_kmem_pages will free
> - * pages allocated with __GFP_KMEMCG.
> + * alloc_kmem_pages charges newly allocated pages to the kmem resource counter
> + * of the current memory cgroup.
> *
> - * Those pages are accounted to a particular memcg, embedded in the
> - * corresponding page_cgroup. To avoid adding a hit in the allocator to search
> - * for that information only to find out that it is NULL for users who have no
> - * interest in that whatsoever, we provide these functions.
> - *
> - * The caller knows better which flags it relies on.
> + * It should be used when the caller would like to use kmalloc, but since the
> + * allocation is large, it has to fall back to the page allocator.
> + */
> +struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
> +{
> + struct page *page;
> + struct mem_cgroup *memcg = NULL;
> +
> + if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
> + return NULL;
> + page = alloc_pages(gfp_mask, order);
> + memcg_kmem_commit_charge(page, memcg, order);
> + return page;
> +}
> +
> +struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
> +{
> + struct page *page;
> + struct mem_cgroup *memcg = NULL;
> +
> + if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
> + return NULL;
> + page = alloc_pages_node(nid, gfp_mask, order);
> + memcg_kmem_commit_charge(page, memcg, order);
> + return page;
> +}
> +
> +/*
> + * __free_kmem_pages and free_kmem_pages will free pages allocated with
> + * alloc_kmem_pages.
> */
> -void __free_memcg_kmem_pages(struct page *page, unsigned int order)
> +void __free_kmem_pages(struct page *page, unsigned int order)
> {
> memcg_kmem_uncharge_pages(page, order);
> __free_pages(page, order);
> }
>
> -void free_memcg_kmem_pages(unsigned long addr, unsigned int order)
> +void free_kmem_pages(unsigned long addr, unsigned int order)
> {
> if (addr != 0) {
> VM_BUG_ON(!virt_addr_valid((void *)addr));
> - __free_memcg_kmem_pages(virt_to_page((void *)addr), order);
> + __free_kmem_pages(virt_to_page((void *)addr), order);
> }
> }
>
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 6673597ac967..cab4c49b3e8c 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -573,6 +573,18 @@ void __init create_kmalloc_caches(unsigned long flags)
> }
> #endif /* !CONFIG_SLOB */
>
> +void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
> +{
> + void *ret;
> + struct page *page;
> +
> + flags |= __GFP_COMP;
> + page = alloc_kmem_pages(flags, order);
> + ret = page ? page_address(page) : NULL;
> + kmemleak_alloc(ret, size, 1, flags);
> + return ret;
> +}
> +
> #ifdef CONFIG_TRACING
> void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
> {
> diff --git a/mm/slub.c b/mm/slub.c
> index b203cfceff95..fa7a1817835e 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3335,8 +3335,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
> struct page *page;
> void *ptr = NULL;
>
> - flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
> - page = alloc_pages_node(node, flags, get_order(size));
> + flags |= __GFP_COMP | __GFP_NOTRACK;
> + page = alloc_kmem_pages_node(node, flags, get_order(size));
> if (page)
> ptr = page_address(page);
>
> @@ -3405,7 +3405,7 @@ void kfree(const void *x)
> if (unlikely(!PageSlab(page))) {
> BUG_ON(!PageCompound(page));
> kfree_hook(x);
> - __free_memcg_kmem_pages(page, compound_order(page));
> + __free_kmem_pages(page, compound_order(page));
> return;
> }
> slab_free(page->slab_cache, page, object, _RET_IP_);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 32+ messages in thread* Re: [PATCH -mm v2 2/2] mm: get rid of __GFP_KMEMCG
@ 2014-04-02 0:48 ` Greg Thelen
0 siblings, 0 replies; 32+ messages in thread
From: Greg Thelen @ 2014-04-02 0:48 UTC (permalink / raw)
To: Vladimir Davydov
Cc: akpm, hannes, mhocko, glommer, linux-kernel, linux-mm, devel
On Tue, Apr 01 2014, Vladimir Davydov <vdavydov@parallels.com> wrote:
> Currently to allocate a page that should be charged to kmemcg (e.g.
> threadinfo), we pass __GFP_KMEMCG flag to the page allocator. The page
> allocated is then to be freed by free_memcg_kmem_pages. Apart from
> looking asymmetrical, this also requires intrusion to the general
> allocation path. So let's introduce separate functions that will
> alloc/free pages charged to kmemcg.
>
> The new functions are called alloc_kmem_pages and free_kmem_pages. They
> should be used when the caller actually would like to use kmalloc, but
> has to fall back to the page allocator for the allocation is large. They
> only differ from alloc_pages and free_pages in that besides allocating
> or freeing pages they also charge them to the kmem resource counter of
> the current memory cgroup.
>
> Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
> ---
> include/linux/gfp.h | 10 ++++---
> include/linux/memcontrol.h | 2 +-
> include/linux/slab.h | 11 --------
> include/linux/thread_info.h | 2 --
> include/trace/events/gfpflags.h | 1 -
> kernel/fork.c | 6 ++---
> mm/page_alloc.c | 56 ++++++++++++++++++++++++---------------
> mm/slab_common.c | 12 +++++++++
> mm/slub.c | 6 ++---
> 9 files changed, 60 insertions(+), 46 deletions(-)
>
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 39b81dc7d01a..d382db71e300 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -31,7 +31,6 @@ struct vm_area_struct;
> #define ___GFP_HARDWALL 0x20000u
> #define ___GFP_THISNODE 0x40000u
> #define ___GFP_RECLAIMABLE 0x80000u
> -#define ___GFP_KMEMCG 0x100000u
> #define ___GFP_NOTRACK 0x200000u
> #define ___GFP_NO_KSWAPD 0x400000u
> #define ___GFP_OTHER_NODE 0x800000u
> @@ -91,7 +90,6 @@ struct vm_area_struct;
>
> #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
> #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
> -#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
> #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
>
> /*
> @@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
> #define alloc_page_vma_node(gfp_mask, vma, addr, node) \
> alloc_pages_vma(gfp_mask, 0, vma, addr, node)
>
> +extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
> +extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
> + unsigned int order);
> +
> extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
> extern unsigned long get_zeroed_page(gfp_t gfp_mask);
>
> @@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order);
> extern void free_hot_cold_page(struct page *page, int cold);
> extern void free_hot_cold_page_list(struct list_head *list, int cold);
>
> -extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
> -extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
> +extern void __free_kmem_pages(struct page *page, unsigned int order);
> +extern void free_kmem_pages(unsigned long addr, unsigned int order);
>
> #define __free_page(page) __free_pages((page), 0)
> #define free_page(addr) free_pages((addr), 0)
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 29068dd26c3d..13acdb5259f5 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -543,7 +543,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
> * res_counter_charge_nofail, but we hope those allocations are rare,
> * and won't be worth the trouble.
> */
> - if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
> + if (gfp & __GFP_NOFAIL)
> return true;
> if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
> return true;
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 3dd389aa91c7..6d6959292e00 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -358,17 +358,6 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
> #include <linux/slub_def.h>
> #endif
>
> -static __always_inline void *
> -kmalloc_order(size_t size, gfp_t flags, unsigned int order)
> -{
> - void *ret;
> -
> - flags |= (__GFP_COMP | __GFP_KMEMCG);
> - ret = (void *) __get_free_pages(flags, order);
> - kmemleak_alloc(ret, size, 1, flags);
> - return ret;
> -}
> -
Removing this from the header file breaks builds without
CONFIG_TRACING.
Example:
% make allnoconfig && make -j4 mm/
[...]
include/linux/slab.h: In function ‘kmalloc_order_trace’:
include/linux/slab.h:367:2: error: implicit declaration of function ‘kmalloc_order’ [-Werror=implicit-function-declaration]
> #ifdef CONFIG_TRACING
> extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
> #else
> diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
> index fddbe2023a5d..1807bb194816 100644
> --- a/include/linux/thread_info.h
> +++ b/include/linux/thread_info.h
> @@ -61,8 +61,6 @@ extern long do_no_restart_syscall(struct restart_block *parm);
> # define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
> #endif
>
> -#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
> -
> /*
> * flag set/clear/test wrappers
> * - pass TIF_xxxx constants to these functions
> diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
> index 1eddbf1557f2..d6fd8e5b14b7 100644
> --- a/include/trace/events/gfpflags.h
> +++ b/include/trace/events/gfpflags.h
> @@ -34,7 +34,6 @@
> {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
> {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
> {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
> - {(unsigned long)__GFP_KMEMCG, "GFP_KMEMCG"}, \
> {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
> {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
> {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
> diff --git a/kernel/fork.c b/kernel/fork.c
> index f4b09bc15f3a..a1632c878037 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -150,15 +150,15 @@ void __weak arch_release_thread_info(struct thread_info *ti)
> static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
> int node)
> {
> - struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
> - THREAD_SIZE_ORDER);
> + struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
> + THREAD_SIZE_ORDER);
>
> return page ? page_address(page) : NULL;
> }
>
> static inline void free_thread_info(struct thread_info *ti)
> {
> - free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
> + free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
> }
> # else
> static struct kmem_cache *thread_info_cache;
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 0327f9d5a8c0..41378986a1e6 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2723,7 +2723,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
> int migratetype = allocflags_to_migratetype(gfp_mask);
> unsigned int cpuset_mems_cookie;
> int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
> - struct mem_cgroup *memcg = NULL;
>
> gfp_mask &= gfp_allowed_mask;
>
> @@ -2742,13 +2741,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
> if (unlikely(!zonelist->_zonerefs->zone))
> return NULL;
>
> - /*
> - * Will only have any effect when __GFP_KMEMCG is set. This is
> - * verified in the (always inline) callee
> - */
> - if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
> - return NULL;
> -
> retry_cpuset:
> cpuset_mems_cookie = read_mems_allowed_begin();
>
> @@ -2810,8 +2802,6 @@ out:
> if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
> goto retry_cpuset;
>
> - memcg_kmem_commit_charge(page, memcg, order);
> -
> if (page)
> set_page_owner(page, order, gfp_mask);
>
> @@ -2868,27 +2858,51 @@ void free_pages(unsigned long addr, unsigned int order)
> EXPORT_SYMBOL(free_pages);
>
> /*
> - * __free_memcg_kmem_pages and free_memcg_kmem_pages will free
> - * pages allocated with __GFP_KMEMCG.
> + * alloc_kmem_pages charges newly allocated pages to the kmem resource counter
> + * of the current memory cgroup.
> *
> - * Those pages are accounted to a particular memcg, embedded in the
> - * corresponding page_cgroup. To avoid adding a hit in the allocator to search
> - * for that information only to find out that it is NULL for users who have no
> - * interest in that whatsoever, we provide these functions.
> - *
> - * The caller knows better which flags it relies on.
> + * It should be used when the caller would like to use kmalloc, but since the
> + * allocation is large, it has to fall back to the page allocator.
> + */
> +struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
> +{
> + struct page *page;
> + struct mem_cgroup *memcg = NULL;
> +
> + if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
> + return NULL;
> + page = alloc_pages(gfp_mask, order);
> + memcg_kmem_commit_charge(page, memcg, order);
> + return page;
> +}
> +
> +struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
> +{
> + struct page *page;
> + struct mem_cgroup *memcg = NULL;
> +
> + if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
> + return NULL;
> + page = alloc_pages_node(nid, gfp_mask, order);
> + memcg_kmem_commit_charge(page, memcg, order);
> + return page;
> +}
> +
> +/*
> + * __free_kmem_pages and free_kmem_pages will free pages allocated with
> + * alloc_kmem_pages.
> */
> -void __free_memcg_kmem_pages(struct page *page, unsigned int order)
> +void __free_kmem_pages(struct page *page, unsigned int order)
> {
> memcg_kmem_uncharge_pages(page, order);
> __free_pages(page, order);
> }
>
> -void free_memcg_kmem_pages(unsigned long addr, unsigned int order)
> +void free_kmem_pages(unsigned long addr, unsigned int order)
> {
> if (addr != 0) {
> VM_BUG_ON(!virt_addr_valid((void *)addr));
> - __free_memcg_kmem_pages(virt_to_page((void *)addr), order);
> + __free_kmem_pages(virt_to_page((void *)addr), order);
> }
> }
>
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 6673597ac967..cab4c49b3e8c 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -573,6 +573,18 @@ void __init create_kmalloc_caches(unsigned long flags)
> }
> #endif /* !CONFIG_SLOB */
>
> +void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
> +{
> + void *ret;
> + struct page *page;
> +
> + flags |= __GFP_COMP;
> + page = alloc_kmem_pages(flags, order);
> + ret = page ? page_address(page) : NULL;
> + kmemleak_alloc(ret, size, 1, flags);
> + return ret;
> +}
> +
> #ifdef CONFIG_TRACING
> void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
> {
> diff --git a/mm/slub.c b/mm/slub.c
> index b203cfceff95..fa7a1817835e 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3335,8 +3335,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
> struct page *page;
> void *ptr = NULL;
>
> - flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
> - page = alloc_pages_node(node, flags, get_order(size));
> + flags |= __GFP_COMP | __GFP_NOTRACK;
> + page = alloc_kmem_pages_node(node, flags, get_order(size));
> if (page)
> ptr = page_address(page);
>
> @@ -3405,7 +3405,7 @@ void kfree(const void *x)
> if (unlikely(!PageSlab(page))) {
> BUG_ON(!PageCompound(page));
> kfree_hook(x);
> - __free_memcg_kmem_pages(page, compound_order(page));
> + __free_kmem_pages(page, compound_order(page));
> return;
> }
> slab_free(page->slab_cache, page, object, _RET_IP_);
^ permalink raw reply [flat|nested] 32+ messages in thread* Re: [PATCH -mm v2 2/2] mm: get rid of __GFP_KMEMCG
2014-04-02 0:48 ` Greg Thelen
@ 2014-04-02 6:11 ` Vladimir Davydov
-1 siblings, 0 replies; 32+ messages in thread
From: Vladimir Davydov @ 2014-04-02 6:11 UTC (permalink / raw)
To: Greg Thelen; +Cc: akpm, hannes, mhocko, glommer, linux-kernel, linux-mm, devel
On 04/02/2014 04:48 AM, Greg Thelen wrote:
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 3dd389aa91c7..6d6959292e00 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -358,17 +358,6 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
> #include <linux/slub_def.h>
> #endif
>
> -static __always_inline void *
> -kmalloc_order(size_t size, gfp_t flags, unsigned int order)
> -{
> - void *ret;
> -
> - flags |= (__GFP_COMP | __GFP_KMEMCG);
> - ret = (void *) __get_free_pages(flags, order);
> - kmemleak_alloc(ret, size, 1, flags);
> - return ret;
> -}
> -
> Removing this from the header file breaks builds without
> CONFIG_TRACING.
> Example:
> % make allnoconfig && make -j4 mm/
> [...]
> include/linux/slab.h: In function a??kmalloc_order_tracea??:
> include/linux/slab.h:367:2: error: implicit declaration of function a??kmalloc_ordera?? [-Werror=implicit-function-declaration]
Oh, my bad - forgot to add the function declaration :-(
The fixed version is on its way. Thank you for catching this!
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 32+ messages in thread* Re: [PATCH -mm v2 2/2] mm: get rid of __GFP_KMEMCG
@ 2014-04-02 6:11 ` Vladimir Davydov
0 siblings, 0 replies; 32+ messages in thread
From: Vladimir Davydov @ 2014-04-02 6:11 UTC (permalink / raw)
To: Greg Thelen; +Cc: akpm, hannes, mhocko, glommer, linux-kernel, linux-mm, devel
On 04/02/2014 04:48 AM, Greg Thelen wrote:
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 3dd389aa91c7..6d6959292e00 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -358,17 +358,6 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
> #include <linux/slub_def.h>
> #endif
>
> -static __always_inline void *
> -kmalloc_order(size_t size, gfp_t flags, unsigned int order)
> -{
> - void *ret;
> -
> - flags |= (__GFP_COMP | __GFP_KMEMCG);
> - ret = (void *) __get_free_pages(flags, order);
> - kmemleak_alloc(ret, size, 1, flags);
> - return ret;
> -}
> -
> Removing this from the header file breaks builds without
> CONFIG_TRACING.
> Example:
> % make allnoconfig && make -j4 mm/
> [...]
> include/linux/slab.h: In function ‘kmalloc_order_trace’:
> include/linux/slab.h:367:2: error: implicit declaration of function ‘kmalloc_order’ [-Werror=implicit-function-declaration]
Oh, my bad - forgot to add the function declaration :-(
The fixed version is on its way. Thank you for catching this!
^ permalink raw reply [flat|nested] 32+ messages in thread
* [PATCH -mm v2.1] mm: get rid of __GFP_KMEMCG
2014-04-01 7:38 ` Vladimir Davydov
@ 2014-04-02 6:16 ` Vladimir Davydov
-1 siblings, 0 replies; 32+ messages in thread
From: Vladimir Davydov @ 2014-04-02 6:16 UTC (permalink / raw)
To: akpm; +Cc: hannes, mhocko, glommer, gthelen, linux-kernel, linux-mm, devel
Currently to allocate a page that should be charged to kmemcg (e.g.
threadinfo), we pass __GFP_KMEMCG flag to the page allocator. The page
allocated is then to be freed by free_memcg_kmem_pages. Apart from
looking asymmetrical, this also requires intrusion to the general
allocation path. So let's introduce separate functions that will
alloc/free pages charged to kmemcg.
The new functions are called alloc_kmem_pages and free_kmem_pages. They
should be used when the caller actually would like to use kmalloc, but
has to fall back to the page allocator for the allocation is large. They
only differ from alloc_pages and free_pages in that besides allocating
or freeing pages they also charge them to the kmem resource counter of
the current memory cgroup.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Pekka Enberg <penberg@kernel.org>
---
Changes in v2.1:
- add missing kmalloc_order forward declaration; lacking it caused
compilation breakage with CONFIG_TRACING=n
include/linux/gfp.h | 10 ++++---
include/linux/memcontrol.h | 2 +-
include/linux/slab.h | 11 +-------
include/linux/thread_info.h | 2 --
include/trace/events/gfpflags.h | 1 -
kernel/fork.c | 6 ++---
mm/page_alloc.c | 56 ++++++++++++++++++++++++---------------
mm/slab_common.c | 12 +++++++++
mm/slub.c | 6 ++---
9 files changed, 61 insertions(+), 45 deletions(-)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 39b81dc7d01a..d382db71e300 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -31,7 +31,6 @@ struct vm_area_struct;
#define ___GFP_HARDWALL 0x20000u
#define ___GFP_THISNODE 0x40000u
#define ___GFP_RECLAIMABLE 0x80000u
-#define ___GFP_KMEMCG 0x100000u
#define ___GFP_NOTRACK 0x200000u
#define ___GFP_NO_KSWAPD 0x400000u
#define ___GFP_OTHER_NODE 0x800000u
@@ -91,7 +90,6 @@ struct vm_area_struct;
#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
-#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
/*
@@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
alloc_pages_vma(gfp_mask, 0, vma, addr, node)
+extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
+extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
+ unsigned int order);
+
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
@@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order);
extern void free_hot_cold_page(struct page *page, int cold);
extern void free_hot_cold_page_list(struct list_head *list, int cold);
-extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
-extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
+extern void __free_kmem_pages(struct page *page, unsigned int order);
+extern void free_kmem_pages(unsigned long addr, unsigned int order);
#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr), 0)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 29068dd26c3d..13acdb5259f5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -543,7 +543,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
* res_counter_charge_nofail, but we hope those allocations are rare,
* and won't be worth the trouble.
*/
- if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
+ if (gfp & __GFP_NOFAIL)
return true;
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
return true;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 3dd389aa91c7..eaaf2f69c54d 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -358,16 +358,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
#include <linux/slub_def.h>
#endif
-static __always_inline void *
-kmalloc_order(size_t size, gfp_t flags, unsigned int order)
-{
- void *ret;
-
- flags |= (__GFP_COMP | __GFP_KMEMCG);
- ret = (void *) __get_free_pages(flags, order);
- kmemleak_alloc(ret, size, 1, flags);
- return ret;
-}
+extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
#ifdef CONFIG_TRACING
extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index fddbe2023a5d..1807bb194816 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -61,8 +61,6 @@ extern long do_no_restart_syscall(struct restart_block *parm);
# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
#endif
-#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
-
/*
* flag set/clear/test wrappers
* - pass TIF_xxxx constants to these functions
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index 1eddbf1557f2..d6fd8e5b14b7 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -34,7 +34,6 @@
{(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
{(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
{(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
- {(unsigned long)__GFP_KMEMCG, "GFP_KMEMCG"}, \
{(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
{(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
{(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
diff --git a/kernel/fork.c b/kernel/fork.c
index f4b09bc15f3a..a1632c878037 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -150,15 +150,15 @@ void __weak arch_release_thread_info(struct thread_info *ti)
static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
int node)
{
- struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
- THREAD_SIZE_ORDER);
+ struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
+ THREAD_SIZE_ORDER);
return page ? page_address(page) : NULL;
}
static inline void free_thread_info(struct thread_info *ti)
{
- free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+ free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
}
# else
static struct kmem_cache *thread_info_cache;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0327f9d5a8c0..41378986a1e6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2723,7 +2723,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
int migratetype = allocflags_to_migratetype(gfp_mask);
unsigned int cpuset_mems_cookie;
int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
- struct mem_cgroup *memcg = NULL;
gfp_mask &= gfp_allowed_mask;
@@ -2742,13 +2741,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (unlikely(!zonelist->_zonerefs->zone))
return NULL;
- /*
- * Will only have any effect when __GFP_KMEMCG is set. This is
- * verified in the (always inline) callee
- */
- if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
- return NULL;
-
retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
@@ -2810,8 +2802,6 @@ out:
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
goto retry_cpuset;
- memcg_kmem_commit_charge(page, memcg, order);
-
if (page)
set_page_owner(page, order, gfp_mask);
@@ -2868,27 +2858,51 @@ void free_pages(unsigned long addr, unsigned int order)
EXPORT_SYMBOL(free_pages);
/*
- * __free_memcg_kmem_pages and free_memcg_kmem_pages will free
- * pages allocated with __GFP_KMEMCG.
+ * alloc_kmem_pages charges newly allocated pages to the kmem resource counter
+ * of the current memory cgroup.
*
- * Those pages are accounted to a particular memcg, embedded in the
- * corresponding page_cgroup. To avoid adding a hit in the allocator to search
- * for that information only to find out that it is NULL for users who have no
- * interest in that whatsoever, we provide these functions.
- *
- * The caller knows better which flags it relies on.
+ * It should be used when the caller would like to use kmalloc, but since the
+ * allocation is large, it has to fall back to the page allocator.
+ */
+struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
+{
+ struct page *page;
+ struct mem_cgroup *memcg = NULL;
+
+ if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+ return NULL;
+ page = alloc_pages(gfp_mask, order);
+ memcg_kmem_commit_charge(page, memcg, order);
+ return page;
+}
+
+struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
+{
+ struct page *page;
+ struct mem_cgroup *memcg = NULL;
+
+ if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+ return NULL;
+ page = alloc_pages_node(nid, gfp_mask, order);
+ memcg_kmem_commit_charge(page, memcg, order);
+ return page;
+}
+
+/*
+ * __free_kmem_pages and free_kmem_pages will free pages allocated with
+ * alloc_kmem_pages.
*/
-void __free_memcg_kmem_pages(struct page *page, unsigned int order)
+void __free_kmem_pages(struct page *page, unsigned int order)
{
memcg_kmem_uncharge_pages(page, order);
__free_pages(page, order);
}
-void free_memcg_kmem_pages(unsigned long addr, unsigned int order)
+void free_kmem_pages(unsigned long addr, unsigned int order)
{
if (addr != 0) {
VM_BUG_ON(!virt_addr_valid((void *)addr));
- __free_memcg_kmem_pages(virt_to_page((void *)addr), order);
+ __free_kmem_pages(virt_to_page((void *)addr), order);
}
}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 6673597ac967..cab4c49b3e8c 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -573,6 +573,18 @@ void __init create_kmalloc_caches(unsigned long flags)
}
#endif /* !CONFIG_SLOB */
+void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
+{
+ void *ret;
+ struct page *page;
+
+ flags |= __GFP_COMP;
+ page = alloc_kmem_pages(flags, order);
+ ret = page ? page_address(page) : NULL;
+ kmemleak_alloc(ret, size, 1, flags);
+ return ret;
+}
+
#ifdef CONFIG_TRACING
void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
{
diff --git a/mm/slub.c b/mm/slub.c
index b203cfceff95..fa7a1817835e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3335,8 +3335,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
struct page *page;
void *ptr = NULL;
- flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
- page = alloc_pages_node(node, flags, get_order(size));
+ flags |= __GFP_COMP | __GFP_NOTRACK;
+ page = alloc_kmem_pages_node(node, flags, get_order(size));
if (page)
ptr = page_address(page);
@@ -3405,7 +3405,7 @@ void kfree(const void *x)
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
kfree_hook(x);
- __free_memcg_kmem_pages(page, compound_order(page));
+ __free_kmem_pages(page, compound_order(page));
return;
}
slab_free(page->slab_cache, page, object, _RET_IP_);
--
1.7.10.4
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 32+ messages in thread* [PATCH -mm v2.1] mm: get rid of __GFP_KMEMCG
@ 2014-04-02 6:16 ` Vladimir Davydov
0 siblings, 0 replies; 32+ messages in thread
From: Vladimir Davydov @ 2014-04-02 6:16 UTC (permalink / raw)
To: akpm; +Cc: hannes, mhocko, glommer, gthelen, linux-kernel, linux-mm, devel
Currently to allocate a page that should be charged to kmemcg (e.g.
threadinfo), we pass __GFP_KMEMCG flag to the page allocator. The page
allocated is then to be freed by free_memcg_kmem_pages. Apart from
looking asymmetrical, this also requires intrusion to the general
allocation path. So let's introduce separate functions that will
alloc/free pages charged to kmemcg.
The new functions are called alloc_kmem_pages and free_kmem_pages. They
should be used when the caller actually would like to use kmalloc, but
has to fall back to the page allocator for the allocation is large. They
only differ from alloc_pages and free_pages in that besides allocating
or freeing pages they also charge them to the kmem resource counter of
the current memory cgroup.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Pekka Enberg <penberg@kernel.org>
---
Changes in v2.1:
- add missing kmalloc_order forward declaration; lacking it caused
compilation breakage with CONFIG_TRACING=n
include/linux/gfp.h | 10 ++++---
include/linux/memcontrol.h | 2 +-
include/linux/slab.h | 11 +-------
include/linux/thread_info.h | 2 --
include/trace/events/gfpflags.h | 1 -
kernel/fork.c | 6 ++---
mm/page_alloc.c | 56 ++++++++++++++++++++++++---------------
mm/slab_common.c | 12 +++++++++
mm/slub.c | 6 ++---
9 files changed, 61 insertions(+), 45 deletions(-)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 39b81dc7d01a..d382db71e300 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -31,7 +31,6 @@ struct vm_area_struct;
#define ___GFP_HARDWALL 0x20000u
#define ___GFP_THISNODE 0x40000u
#define ___GFP_RECLAIMABLE 0x80000u
-#define ___GFP_KMEMCG 0x100000u
#define ___GFP_NOTRACK 0x200000u
#define ___GFP_NO_KSWAPD 0x400000u
#define ___GFP_OTHER_NODE 0x800000u
@@ -91,7 +90,6 @@ struct vm_area_struct;
#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
-#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
/*
@@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
alloc_pages_vma(gfp_mask, 0, vma, addr, node)
+extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
+extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
+ unsigned int order);
+
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
@@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order);
extern void free_hot_cold_page(struct page *page, int cold);
extern void free_hot_cold_page_list(struct list_head *list, int cold);
-extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
-extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
+extern void __free_kmem_pages(struct page *page, unsigned int order);
+extern void free_kmem_pages(unsigned long addr, unsigned int order);
#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr), 0)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 29068dd26c3d..13acdb5259f5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -543,7 +543,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
* res_counter_charge_nofail, but we hope those allocations are rare,
* and won't be worth the trouble.
*/
- if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
+ if (gfp & __GFP_NOFAIL)
return true;
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
return true;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 3dd389aa91c7..eaaf2f69c54d 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -358,16 +358,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
#include <linux/slub_def.h>
#endif
-static __always_inline void *
-kmalloc_order(size_t size, gfp_t flags, unsigned int order)
-{
- void *ret;
-
- flags |= (__GFP_COMP | __GFP_KMEMCG);
- ret = (void *) __get_free_pages(flags, order);
- kmemleak_alloc(ret, size, 1, flags);
- return ret;
-}
+extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
#ifdef CONFIG_TRACING
extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index fddbe2023a5d..1807bb194816 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -61,8 +61,6 @@ extern long do_no_restart_syscall(struct restart_block *parm);
# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
#endif
-#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
-
/*
* flag set/clear/test wrappers
* - pass TIF_xxxx constants to these functions
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index 1eddbf1557f2..d6fd8e5b14b7 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -34,7 +34,6 @@
{(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
{(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
{(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
- {(unsigned long)__GFP_KMEMCG, "GFP_KMEMCG"}, \
{(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
{(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
{(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
diff --git a/kernel/fork.c b/kernel/fork.c
index f4b09bc15f3a..a1632c878037 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -150,15 +150,15 @@ void __weak arch_release_thread_info(struct thread_info *ti)
static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
int node)
{
- struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
- THREAD_SIZE_ORDER);
+ struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
+ THREAD_SIZE_ORDER);
return page ? page_address(page) : NULL;
}
static inline void free_thread_info(struct thread_info *ti)
{
- free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+ free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
}
# else
static struct kmem_cache *thread_info_cache;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0327f9d5a8c0..41378986a1e6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2723,7 +2723,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
int migratetype = allocflags_to_migratetype(gfp_mask);
unsigned int cpuset_mems_cookie;
int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
- struct mem_cgroup *memcg = NULL;
gfp_mask &= gfp_allowed_mask;
@@ -2742,13 +2741,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (unlikely(!zonelist->_zonerefs->zone))
return NULL;
- /*
- * Will only have any effect when __GFP_KMEMCG is set. This is
- * verified in the (always inline) callee
- */
- if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
- return NULL;
-
retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
@@ -2810,8 +2802,6 @@ out:
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
goto retry_cpuset;
- memcg_kmem_commit_charge(page, memcg, order);
-
if (page)
set_page_owner(page, order, gfp_mask);
@@ -2868,27 +2858,51 @@ void free_pages(unsigned long addr, unsigned int order)
EXPORT_SYMBOL(free_pages);
/*
- * __free_memcg_kmem_pages and free_memcg_kmem_pages will free
- * pages allocated with __GFP_KMEMCG.
+ * alloc_kmem_pages charges newly allocated pages to the kmem resource counter
+ * of the current memory cgroup.
*
- * Those pages are accounted to a particular memcg, embedded in the
- * corresponding page_cgroup. To avoid adding a hit in the allocator to search
- * for that information only to find out that it is NULL for users who have no
- * interest in that whatsoever, we provide these functions.
- *
- * The caller knows better which flags it relies on.
+ * It should be used when the caller would like to use kmalloc, but since the
+ * allocation is large, it has to fall back to the page allocator.
+ */
+struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
+{
+ struct page *page;
+ struct mem_cgroup *memcg = NULL;
+
+ if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+ return NULL;
+ page = alloc_pages(gfp_mask, order);
+ memcg_kmem_commit_charge(page, memcg, order);
+ return page;
+}
+
+struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
+{
+ struct page *page;
+ struct mem_cgroup *memcg = NULL;
+
+ if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+ return NULL;
+ page = alloc_pages_node(nid, gfp_mask, order);
+ memcg_kmem_commit_charge(page, memcg, order);
+ return page;
+}
+
+/*
+ * __free_kmem_pages and free_kmem_pages will free pages allocated with
+ * alloc_kmem_pages.
*/
-void __free_memcg_kmem_pages(struct page *page, unsigned int order)
+void __free_kmem_pages(struct page *page, unsigned int order)
{
memcg_kmem_uncharge_pages(page, order);
__free_pages(page, order);
}
-void free_memcg_kmem_pages(unsigned long addr, unsigned int order)
+void free_kmem_pages(unsigned long addr, unsigned int order)
{
if (addr != 0) {
VM_BUG_ON(!virt_addr_valid((void *)addr));
- __free_memcg_kmem_pages(virt_to_page((void *)addr), order);
+ __free_kmem_pages(virt_to_page((void *)addr), order);
}
}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 6673597ac967..cab4c49b3e8c 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -573,6 +573,18 @@ void __init create_kmalloc_caches(unsigned long flags)
}
#endif /* !CONFIG_SLOB */
+void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
+{
+ void *ret;
+ struct page *page;
+
+ flags |= __GFP_COMP;
+ page = alloc_kmem_pages(flags, order);
+ ret = page ? page_address(page) : NULL;
+ kmemleak_alloc(ret, size, 1, flags);
+ return ret;
+}
+
#ifdef CONFIG_TRACING
void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
{
diff --git a/mm/slub.c b/mm/slub.c
index b203cfceff95..fa7a1817835e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3335,8 +3335,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
struct page *page;
void *ptr = NULL;
- flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
- page = alloc_pages_node(node, flags, get_order(size));
+ flags |= __GFP_COMP | __GFP_NOTRACK;
+ page = alloc_kmem_pages_node(node, flags, get_order(size));
if (page)
ptr = page_address(page);
@@ -3405,7 +3405,7 @@ void kfree(const void *x)
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
kfree_hook(x);
- __free_memcg_kmem_pages(page, compound_order(page));
+ __free_kmem_pages(page, compound_order(page));
return;
}
slab_free(page->slab_cache, page, object, _RET_IP_);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 32+ messages in thread* Re: [PATCH -mm v2.1] mm: get rid of __GFP_KMEMCG
2014-04-02 6:16 ` Vladimir Davydov
@ 2014-04-02 21:25 ` Greg Thelen
-1 siblings, 0 replies; 32+ messages in thread
From: Greg Thelen @ 2014-04-02 21:25 UTC (permalink / raw)
To: Vladimir Davydov
Cc: akpm, hannes, mhocko, glommer, linux-kernel, linux-mm, devel
On Tue, Apr 01 2014, Vladimir Davydov <vdavydov@parallels.com> wrote:
> Currently to allocate a page that should be charged to kmemcg (e.g.
> threadinfo), we pass __GFP_KMEMCG flag to the page allocator. The page
> allocated is then to be freed by free_memcg_kmem_pages. Apart from
> looking asymmetrical, this also requires intrusion to the general
> allocation path. So let's introduce separate functions that will
> alloc/free pages charged to kmemcg.
>
> The new functions are called alloc_kmem_pages and free_kmem_pages. They
> should be used when the caller actually would like to use kmalloc, but
> has to fall back to the page allocator for the allocation is large. They
> only differ from alloc_pages and free_pages in that besides allocating
> or freeing pages they also charge them to the kmem resource counter of
> the current memory cgroup.
>
> Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
One comment nit below, otherwise looks good to me.
Acked-by: Greg Thelen <gthelen@google.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Michal Hocko <mhocko@suse.cz>
> Cc: Glauber Costa <glommer@gmail.com>
> Cc: Christoph Lameter <cl@linux-foundation.org>
> Cc: Pekka Enberg <penberg@kernel.org>
> ---
> Changes in v2.1:
> - add missing kmalloc_order forward declaration; lacking it caused
> compilation breakage with CONFIG_TRACING=n
>
> include/linux/gfp.h | 10 ++++---
> include/linux/memcontrol.h | 2 +-
> include/linux/slab.h | 11 +-------
> include/linux/thread_info.h | 2 --
> include/trace/events/gfpflags.h | 1 -
> kernel/fork.c | 6 ++---
> mm/page_alloc.c | 56 ++++++++++++++++++++++++---------------
> mm/slab_common.c | 12 +++++++++
> mm/slub.c | 6 ++---
> 9 files changed, 61 insertions(+), 45 deletions(-)
>
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 39b81dc7d01a..d382db71e300 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -31,7 +31,6 @@ struct vm_area_struct;
> #define ___GFP_HARDWALL 0x20000u
> #define ___GFP_THISNODE 0x40000u
> #define ___GFP_RECLAIMABLE 0x80000u
> -#define ___GFP_KMEMCG 0x100000u
> #define ___GFP_NOTRACK 0x200000u
> #define ___GFP_NO_KSWAPD 0x400000u
> #define ___GFP_OTHER_NODE 0x800000u
> @@ -91,7 +90,6 @@ struct vm_area_struct;
>
> #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
> #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
> -#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
> #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
>
> /*
> @@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
> #define alloc_page_vma_node(gfp_mask, vma, addr, node) \
> alloc_pages_vma(gfp_mask, 0, vma, addr, node)
>
> +extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
> +extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
> + unsigned int order);
> +
> extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
> extern unsigned long get_zeroed_page(gfp_t gfp_mask);
>
> @@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order);
> extern void free_hot_cold_page(struct page *page, int cold);
> extern void free_hot_cold_page_list(struct list_head *list, int cold);
>
> -extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
> -extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
> +extern void __free_kmem_pages(struct page *page, unsigned int order);
> +extern void free_kmem_pages(unsigned long addr, unsigned int order);
>
> #define __free_page(page) __free_pages((page), 0)
> #define free_page(addr) free_pages((addr), 0)
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 29068dd26c3d..13acdb5259f5 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -543,7 +543,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
> * res_counter_charge_nofail, but we hope those allocations are rare,
> * and won't be worth the trouble.
> */
Just a few lines higher in first memcg_kmem_newpage_charge() comment,
there is a leftover reference to GFP_KMEMCG which should be removed.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH -mm v2.1] mm: get rid of __GFP_KMEMCG
@ 2014-04-02 21:25 ` Greg Thelen
0 siblings, 0 replies; 32+ messages in thread
From: Greg Thelen @ 2014-04-02 21:25 UTC (permalink / raw)
To: Vladimir Davydov
Cc: akpm, hannes, mhocko, glommer, linux-kernel, linux-mm, devel
On Tue, Apr 01 2014, Vladimir Davydov <vdavydov@parallels.com> wrote:
> Currently to allocate a page that should be charged to kmemcg (e.g.
> threadinfo), we pass __GFP_KMEMCG flag to the page allocator. The page
> allocated is then to be freed by free_memcg_kmem_pages. Apart from
> looking asymmetrical, this also requires intrusion to the general
> allocation path. So let's introduce separate functions that will
> alloc/free pages charged to kmemcg.
>
> The new functions are called alloc_kmem_pages and free_kmem_pages. They
> should be used when the caller actually would like to use kmalloc, but
> has to fall back to the page allocator for the allocation is large. They
> only differ from alloc_pages and free_pages in that besides allocating
> or freeing pages they also charge them to the kmem resource counter of
> the current memory cgroup.
>
> Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
One comment nit below, otherwise looks good to me.
Acked-by: Greg Thelen <gthelen@google.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Michal Hocko <mhocko@suse.cz>
> Cc: Glauber Costa <glommer@gmail.com>
> Cc: Christoph Lameter <cl@linux-foundation.org>
> Cc: Pekka Enberg <penberg@kernel.org>
> ---
> Changes in v2.1:
> - add missing kmalloc_order forward declaration; lacking it caused
> compilation breakage with CONFIG_TRACING=n
>
> include/linux/gfp.h | 10 ++++---
> include/linux/memcontrol.h | 2 +-
> include/linux/slab.h | 11 +-------
> include/linux/thread_info.h | 2 --
> include/trace/events/gfpflags.h | 1 -
> kernel/fork.c | 6 ++---
> mm/page_alloc.c | 56 ++++++++++++++++++++++++---------------
> mm/slab_common.c | 12 +++++++++
> mm/slub.c | 6 ++---
> 9 files changed, 61 insertions(+), 45 deletions(-)
>
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 39b81dc7d01a..d382db71e300 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -31,7 +31,6 @@ struct vm_area_struct;
> #define ___GFP_HARDWALL 0x20000u
> #define ___GFP_THISNODE 0x40000u
> #define ___GFP_RECLAIMABLE 0x80000u
> -#define ___GFP_KMEMCG 0x100000u
> #define ___GFP_NOTRACK 0x200000u
> #define ___GFP_NO_KSWAPD 0x400000u
> #define ___GFP_OTHER_NODE 0x800000u
> @@ -91,7 +90,6 @@ struct vm_area_struct;
>
> #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
> #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
> -#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
> #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
>
> /*
> @@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
> #define alloc_page_vma_node(gfp_mask, vma, addr, node) \
> alloc_pages_vma(gfp_mask, 0, vma, addr, node)
>
> +extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
> +extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
> + unsigned int order);
> +
> extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
> extern unsigned long get_zeroed_page(gfp_t gfp_mask);
>
> @@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order);
> extern void free_hot_cold_page(struct page *page, int cold);
> extern void free_hot_cold_page_list(struct list_head *list, int cold);
>
> -extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
> -extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
> +extern void __free_kmem_pages(struct page *page, unsigned int order);
> +extern void free_kmem_pages(unsigned long addr, unsigned int order);
>
> #define __free_page(page) __free_pages((page), 0)
> #define free_page(addr) free_pages((addr), 0)
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 29068dd26c3d..13acdb5259f5 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -543,7 +543,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
> * res_counter_charge_nofail, but we hope those allocations are rare,
> * and won't be worth the trouble.
> */
Just a few lines higher in first memcg_kmem_newpage_charge() comment,
there is a leftover reference to GFP_KMEMCG which should be removed.
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH -mm v2.1] mm: get rid of __GFP_KMEMCG
2014-04-02 21:25 ` Greg Thelen
@ 2014-04-03 15:04 ` Vladimir Davydov
-1 siblings, 0 replies; 32+ messages in thread
From: Vladimir Davydov @ 2014-04-03 15:04 UTC (permalink / raw)
To: Greg Thelen; +Cc: akpm, hannes, mhocko, glommer, linux-kernel, linux-mm, devel
On 04/03/2014 01:25 AM, Greg Thelen wrote:
> On Tue, Apr 01 2014, Vladimir Davydov <vdavydov@parallels.com> wrote:
>
>> Currently to allocate a page that should be charged to kmemcg (e.g.
>> threadinfo), we pass __GFP_KMEMCG flag to the page allocator. The page
>> allocated is then to be freed by free_memcg_kmem_pages. Apart from
>> looking asymmetrical, this also requires intrusion to the general
>> allocation path. So let's introduce separate functions that will
>> alloc/free pages charged to kmemcg.
>>
>> The new functions are called alloc_kmem_pages and free_kmem_pages. They
>> should be used when the caller actually would like to use kmalloc, but
>> has to fall back to the page allocator for the allocation is large. They
>> only differ from alloc_pages and free_pages in that besides allocating
>> or freeing pages they also charge them to the kmem resource counter of
>> the current memory cgroup.
>>
>> Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
> One comment nit below, otherwise looks good to me.
>
> Acked-by: Greg Thelen <gthelen@google.com>
>
>> Cc: Johannes Weiner <hannes@cmpxchg.org>
>> Cc: Michal Hocko <mhocko@suse.cz>
>> Cc: Glauber Costa <glommer@gmail.com>
>> Cc: Christoph Lameter <cl@linux-foundation.org>
>> Cc: Pekka Enberg <penberg@kernel.org>
>> ---
>> Changes in v2.1:
>> - add missing kmalloc_order forward declaration; lacking it caused
>> compilation breakage with CONFIG_TRACING=n
>>
>> include/linux/gfp.h | 10 ++++---
>> include/linux/memcontrol.h | 2 +-
>> include/linux/slab.h | 11 +-------
>> include/linux/thread_info.h | 2 --
>> include/trace/events/gfpflags.h | 1 -
>> kernel/fork.c | 6 ++---
>> mm/page_alloc.c | 56 ++++++++++++++++++++++++---------------
>> mm/slab_common.c | 12 +++++++++
>> mm/slub.c | 6 ++---
>> 9 files changed, 61 insertions(+), 45 deletions(-)
>>
>> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
>> index 39b81dc7d01a..d382db71e300 100644
>> --- a/include/linux/gfp.h
>> +++ b/include/linux/gfp.h
>> @@ -31,7 +31,6 @@ struct vm_area_struct;
>> #define ___GFP_HARDWALL 0x20000u
>> #define ___GFP_THISNODE 0x40000u
>> #define ___GFP_RECLAIMABLE 0x80000u
>> -#define ___GFP_KMEMCG 0x100000u
>> #define ___GFP_NOTRACK 0x200000u
>> #define ___GFP_NO_KSWAPD 0x400000u
>> #define ___GFP_OTHER_NODE 0x800000u
>> @@ -91,7 +90,6 @@ struct vm_area_struct;
>>
>> #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
>> #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
>> -#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
>> #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
>>
>> /*
>> @@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
>> #define alloc_page_vma_node(gfp_mask, vma, addr, node) \
>> alloc_pages_vma(gfp_mask, 0, vma, addr, node)
>>
>> +extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
>> +extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
>> + unsigned int order);
>> +
>> extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
>> extern unsigned long get_zeroed_page(gfp_t gfp_mask);
>>
>> @@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order);
>> extern void free_hot_cold_page(struct page *page, int cold);
>> extern void free_hot_cold_page_list(struct list_head *list, int cold);
>>
>> -extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
>> -extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
>> +extern void __free_kmem_pages(struct page *page, unsigned int order);
>> +extern void free_kmem_pages(unsigned long addr, unsigned int order);
>>
>> #define __free_page(page) __free_pages((page), 0)
>> #define free_page(addr) free_pages((addr), 0)
>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>> index 29068dd26c3d..13acdb5259f5 100644
>> --- a/include/linux/memcontrol.h
>> +++ b/include/linux/memcontrol.h
>> @@ -543,7 +543,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
>> * res_counter_charge_nofail, but we hope those allocations are rare,
>> * and won't be worth the trouble.
>> */
> Just a few lines higher in first memcg_kmem_newpage_charge() comment,
> there is a leftover reference to GFP_KMEMCG which should be removed.
Good catch, will resend.
Thank you for the review!
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH -mm v2.1] mm: get rid of __GFP_KMEMCG
@ 2014-04-03 15:04 ` Vladimir Davydov
0 siblings, 0 replies; 32+ messages in thread
From: Vladimir Davydov @ 2014-04-03 15:04 UTC (permalink / raw)
To: Greg Thelen; +Cc: akpm, hannes, mhocko, glommer, linux-kernel, linux-mm, devel
On 04/03/2014 01:25 AM, Greg Thelen wrote:
> On Tue, Apr 01 2014, Vladimir Davydov <vdavydov@parallels.com> wrote:
>
>> Currently to allocate a page that should be charged to kmemcg (e.g.
>> threadinfo), we pass __GFP_KMEMCG flag to the page allocator. The page
>> allocated is then to be freed by free_memcg_kmem_pages. Apart from
>> looking asymmetrical, this also requires intrusion to the general
>> allocation path. So let's introduce separate functions that will
>> alloc/free pages charged to kmemcg.
>>
>> The new functions are called alloc_kmem_pages and free_kmem_pages. They
>> should be used when the caller actually would like to use kmalloc, but
>> has to fall back to the page allocator for the allocation is large. They
>> only differ from alloc_pages and free_pages in that besides allocating
>> or freeing pages they also charge them to the kmem resource counter of
>> the current memory cgroup.
>>
>> Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
> One comment nit below, otherwise looks good to me.
>
> Acked-by: Greg Thelen <gthelen@google.com>
>
>> Cc: Johannes Weiner <hannes@cmpxchg.org>
>> Cc: Michal Hocko <mhocko@suse.cz>
>> Cc: Glauber Costa <glommer@gmail.com>
>> Cc: Christoph Lameter <cl@linux-foundation.org>
>> Cc: Pekka Enberg <penberg@kernel.org>
>> ---
>> Changes in v2.1:
>> - add missing kmalloc_order forward declaration; lacking it caused
>> compilation breakage with CONFIG_TRACING=n
>>
>> include/linux/gfp.h | 10 ++++---
>> include/linux/memcontrol.h | 2 +-
>> include/linux/slab.h | 11 +-------
>> include/linux/thread_info.h | 2 --
>> include/trace/events/gfpflags.h | 1 -
>> kernel/fork.c | 6 ++---
>> mm/page_alloc.c | 56 ++++++++++++++++++++++++---------------
>> mm/slab_common.c | 12 +++++++++
>> mm/slub.c | 6 ++---
>> 9 files changed, 61 insertions(+), 45 deletions(-)
>>
>> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
>> index 39b81dc7d01a..d382db71e300 100644
>> --- a/include/linux/gfp.h
>> +++ b/include/linux/gfp.h
>> @@ -31,7 +31,6 @@ struct vm_area_struct;
>> #define ___GFP_HARDWALL 0x20000u
>> #define ___GFP_THISNODE 0x40000u
>> #define ___GFP_RECLAIMABLE 0x80000u
>> -#define ___GFP_KMEMCG 0x100000u
>> #define ___GFP_NOTRACK 0x200000u
>> #define ___GFP_NO_KSWAPD 0x400000u
>> #define ___GFP_OTHER_NODE 0x800000u
>> @@ -91,7 +90,6 @@ struct vm_area_struct;
>>
>> #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
>> #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
>> -#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
>> #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
>>
>> /*
>> @@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
>> #define alloc_page_vma_node(gfp_mask, vma, addr, node) \
>> alloc_pages_vma(gfp_mask, 0, vma, addr, node)
>>
>> +extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
>> +extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
>> + unsigned int order);
>> +
>> extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
>> extern unsigned long get_zeroed_page(gfp_t gfp_mask);
>>
>> @@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order);
>> extern void free_hot_cold_page(struct page *page, int cold);
>> extern void free_hot_cold_page_list(struct list_head *list, int cold);
>>
>> -extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
>> -extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
>> +extern void __free_kmem_pages(struct page *page, unsigned int order);
>> +extern void free_kmem_pages(unsigned long addr, unsigned int order);
>>
>> #define __free_page(page) __free_pages((page), 0)
>> #define free_page(addr) free_pages((addr), 0)
>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>> index 29068dd26c3d..13acdb5259f5 100644
>> --- a/include/linux/memcontrol.h
>> +++ b/include/linux/memcontrol.h
>> @@ -543,7 +543,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
>> * res_counter_charge_nofail, but we hope those allocations are rare,
>> * and won't be worth the trouble.
>> */
> Just a few lines higher in first memcg_kmem_newpage_charge() comment,
> there is a leftover reference to GFP_KMEMCG which should be removed.
Good catch, will resend.
Thank you for the review!
^ permalink raw reply [flat|nested] 32+ messages in thread
* [PATCH -mm v2.2] mm: get rid of __GFP_KMEMCG
2014-04-02 6:16 ` Vladimir Davydov
@ 2014-04-03 15:05 ` Vladimir Davydov
-1 siblings, 0 replies; 32+ messages in thread
From: Vladimir Davydov @ 2014-04-03 15:05 UTC (permalink / raw)
To: akpm
Cc: linux-kernel, linux-mm, devel, Greg Thelen, Johannes Weiner,
Michal Hocko, Glauber Costa, Christoph Lameter, Pekka Enberg
Currently to allocate a page that should be charged to kmemcg (e.g.
threadinfo), we pass __GFP_KMEMCG flag to the page allocator. The page
allocated is then to be freed by free_memcg_kmem_pages. Apart from
looking asymmetrical, this also requires intrusion to the general
allocation path. So let's introduce separate functions that will
alloc/free pages charged to kmemcg.
The new functions are called alloc_kmem_pages and free_kmem_pages. They
should be used when the caller actually would like to use kmalloc, but
has to fall back to the page allocator for the allocation is large. They
only differ from alloc_pages and free_pages in that besides allocating
or freeing pages they also charge them to the kmem resource counter of
the current memory cgroup.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Pekka Enberg <penberg@kernel.org>
---
Changes in v2.2:
- remove mentioning of GFP_KMEMCG in the comment to
__memcg_kmem_newpage_charge
Changes in v2.1:
- add missing kmalloc_order forward declaration; lacking it caused
compilation breakage with CONFIG_TRACING=n
include/linux/gfp.h | 10 ++++---
include/linux/memcontrol.h | 2 +-
include/linux/slab.h | 11 +-------
include/linux/thread_info.h | 2 --
include/trace/events/gfpflags.h | 1 -
kernel/fork.c | 6 ++---
mm/memcontrol.c | 11 ++++----
mm/page_alloc.c | 56 ++++++++++++++++++++++++---------------
mm/slab_common.c | 12 +++++++++
mm/slub.c | 6 ++---
10 files changed, 67 insertions(+), 50 deletions(-)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 39b81dc7d01a..d382db71e300 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -31,7 +31,6 @@ struct vm_area_struct;
#define ___GFP_HARDWALL 0x20000u
#define ___GFP_THISNODE 0x40000u
#define ___GFP_RECLAIMABLE 0x80000u
-#define ___GFP_KMEMCG 0x100000u
#define ___GFP_NOTRACK 0x200000u
#define ___GFP_NO_KSWAPD 0x400000u
#define ___GFP_OTHER_NODE 0x800000u
@@ -91,7 +90,6 @@ struct vm_area_struct;
#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
-#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
/*
@@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
alloc_pages_vma(gfp_mask, 0, vma, addr, node)
+extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
+extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
+ unsigned int order);
+
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
@@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order);
extern void free_hot_cold_page(struct page *page, int cold);
extern void free_hot_cold_page_list(struct list_head *list, int cold);
-extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
-extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
+extern void __free_kmem_pages(struct page *page, unsigned int order);
+extern void free_kmem_pages(unsigned long addr, unsigned int order);
#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr), 0)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 29068dd26c3d..13acdb5259f5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -543,7 +543,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
* res_counter_charge_nofail, but we hope those allocations are rare,
* and won't be worth the trouble.
*/
- if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
+ if (gfp & __GFP_NOFAIL)
return true;
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
return true;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 3dd389aa91c7..eaaf2f69c54d 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -358,16 +358,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
#include <linux/slub_def.h>
#endif
-static __always_inline void *
-kmalloc_order(size_t size, gfp_t flags, unsigned int order)
-{
- void *ret;
-
- flags |= (__GFP_COMP | __GFP_KMEMCG);
- ret = (void *) __get_free_pages(flags, order);
- kmemleak_alloc(ret, size, 1, flags);
- return ret;
-}
+extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
#ifdef CONFIG_TRACING
extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index fddbe2023a5d..1807bb194816 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -61,8 +61,6 @@ extern long do_no_restart_syscall(struct restart_block *parm);
# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
#endif
-#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
-
/*
* flag set/clear/test wrappers
* - pass TIF_xxxx constants to these functions
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index 1eddbf1557f2..d6fd8e5b14b7 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -34,7 +34,6 @@
{(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
{(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
{(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
- {(unsigned long)__GFP_KMEMCG, "GFP_KMEMCG"}, \
{(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
{(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
{(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
diff --git a/kernel/fork.c b/kernel/fork.c
index f4b09bc15f3a..a1632c878037 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -150,15 +150,15 @@ void __weak arch_release_thread_info(struct thread_info *ti)
static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
int node)
{
- struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
- THREAD_SIZE_ORDER);
+ struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
+ THREAD_SIZE_ORDER);
return page ? page_address(page) : NULL;
}
static inline void free_thread_info(struct thread_info *ti)
{
- free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+ free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
}
# else
static struct kmem_cache *thread_info_cache;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 71e1e11ab6a2..ba30b26fc483 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3531,11 +3531,12 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
/*
* Disabling accounting is only relevant for some specific memcg
* internal allocations. Therefore we would initially not have such
- * check here, since direct calls to the page allocator that are marked
- * with GFP_KMEMCG only happen outside memcg core. We are mostly
- * concerned with cache allocations, and by having this test at
- * memcg_kmem_get_cache, we are already able to relay the allocation to
- * the root cache and bypass the memcg cache altogether.
+ * check here, since direct calls to the page allocator that are
+ * accounted to kmemcg (alloc_kmem_pages and friends) only happen
+ * outside memcg core. We are mostly concerned with cache allocations,
+ * and by having this test at memcg_kmem_get_cache, we are already able
+ * to relay the allocation to the root cache and bypass the memcg cache
+ * altogether.
*
* There is one exception, though: the SLUB allocator does not create
* large order caches, but rather service large kmallocs directly from
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0327f9d5a8c0..41378986a1e6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2723,7 +2723,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
int migratetype = allocflags_to_migratetype(gfp_mask);
unsigned int cpuset_mems_cookie;
int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
- struct mem_cgroup *memcg = NULL;
gfp_mask &= gfp_allowed_mask;
@@ -2742,13 +2741,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (unlikely(!zonelist->_zonerefs->zone))
return NULL;
- /*
- * Will only have any effect when __GFP_KMEMCG is set. This is
- * verified in the (always inline) callee
- */
- if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
- return NULL;
-
retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
@@ -2810,8 +2802,6 @@ out:
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
goto retry_cpuset;
- memcg_kmem_commit_charge(page, memcg, order);
-
if (page)
set_page_owner(page, order, gfp_mask);
@@ -2868,27 +2858,51 @@ void free_pages(unsigned long addr, unsigned int order)
EXPORT_SYMBOL(free_pages);
/*
- * __free_memcg_kmem_pages and free_memcg_kmem_pages will free
- * pages allocated with __GFP_KMEMCG.
+ * alloc_kmem_pages charges newly allocated pages to the kmem resource counter
+ * of the current memory cgroup.
*
- * Those pages are accounted to a particular memcg, embedded in the
- * corresponding page_cgroup. To avoid adding a hit in the allocator to search
- * for that information only to find out that it is NULL for users who have no
- * interest in that whatsoever, we provide these functions.
- *
- * The caller knows better which flags it relies on.
+ * It should be used when the caller would like to use kmalloc, but since the
+ * allocation is large, it has to fall back to the page allocator.
+ */
+struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
+{
+ struct page *page;
+ struct mem_cgroup *memcg = NULL;
+
+ if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+ return NULL;
+ page = alloc_pages(gfp_mask, order);
+ memcg_kmem_commit_charge(page, memcg, order);
+ return page;
+}
+
+struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
+{
+ struct page *page;
+ struct mem_cgroup *memcg = NULL;
+
+ if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+ return NULL;
+ page = alloc_pages_node(nid, gfp_mask, order);
+ memcg_kmem_commit_charge(page, memcg, order);
+ return page;
+}
+
+/*
+ * __free_kmem_pages and free_kmem_pages will free pages allocated with
+ * alloc_kmem_pages.
*/
-void __free_memcg_kmem_pages(struct page *page, unsigned int order)
+void __free_kmem_pages(struct page *page, unsigned int order)
{
memcg_kmem_uncharge_pages(page, order);
__free_pages(page, order);
}
-void free_memcg_kmem_pages(unsigned long addr, unsigned int order)
+void free_kmem_pages(unsigned long addr, unsigned int order)
{
if (addr != 0) {
VM_BUG_ON(!virt_addr_valid((void *)addr));
- __free_memcg_kmem_pages(virt_to_page((void *)addr), order);
+ __free_kmem_pages(virt_to_page((void *)addr), order);
}
}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 6673597ac967..cab4c49b3e8c 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -573,6 +573,18 @@ void __init create_kmalloc_caches(unsigned long flags)
}
#endif /* !CONFIG_SLOB */
+void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
+{
+ void *ret;
+ struct page *page;
+
+ flags |= __GFP_COMP;
+ page = alloc_kmem_pages(flags, order);
+ ret = page ? page_address(page) : NULL;
+ kmemleak_alloc(ret, size, 1, flags);
+ return ret;
+}
+
#ifdef CONFIG_TRACING
void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
{
diff --git a/mm/slub.c b/mm/slub.c
index b203cfceff95..fa7a1817835e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3335,8 +3335,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
struct page *page;
void *ptr = NULL;
- flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
- page = alloc_pages_node(node, flags, get_order(size));
+ flags |= __GFP_COMP | __GFP_NOTRACK;
+ page = alloc_kmem_pages_node(node, flags, get_order(size));
if (page)
ptr = page_address(page);
@@ -3405,7 +3405,7 @@ void kfree(const void *x)
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
kfree_hook(x);
- __free_memcg_kmem_pages(page, compound_order(page));
+ __free_kmem_pages(page, compound_order(page));
return;
}
slab_free(page->slab_cache, page, object, _RET_IP_);
--
1.7.10.4
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 32+ messages in thread* [PATCH -mm v2.2] mm: get rid of __GFP_KMEMCG
@ 2014-04-03 15:05 ` Vladimir Davydov
0 siblings, 0 replies; 32+ messages in thread
From: Vladimir Davydov @ 2014-04-03 15:05 UTC (permalink / raw)
To: akpm
Cc: linux-kernel, linux-mm, devel, Greg Thelen, Johannes Weiner,
Michal Hocko, Glauber Costa, Christoph Lameter, Pekka Enberg
Currently to allocate a page that should be charged to kmemcg (e.g.
threadinfo), we pass __GFP_KMEMCG flag to the page allocator. The page
allocated is then to be freed by free_memcg_kmem_pages. Apart from
looking asymmetrical, this also requires intrusion to the general
allocation path. So let's introduce separate functions that will
alloc/free pages charged to kmemcg.
The new functions are called alloc_kmem_pages and free_kmem_pages. They
should be used when the caller actually would like to use kmalloc, but
has to fall back to the page allocator for the allocation is large. They
only differ from alloc_pages and free_pages in that besides allocating
or freeing pages they also charge them to the kmem resource counter of
the current memory cgroup.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Pekka Enberg <penberg@kernel.org>
---
Changes in v2.2:
- remove mentioning of GFP_KMEMCG in the comment to
__memcg_kmem_newpage_charge
Changes in v2.1:
- add missing kmalloc_order forward declaration; lacking it caused
compilation breakage with CONFIG_TRACING=n
include/linux/gfp.h | 10 ++++---
include/linux/memcontrol.h | 2 +-
include/linux/slab.h | 11 +-------
include/linux/thread_info.h | 2 --
include/trace/events/gfpflags.h | 1 -
kernel/fork.c | 6 ++---
mm/memcontrol.c | 11 ++++----
mm/page_alloc.c | 56 ++++++++++++++++++++++++---------------
mm/slab_common.c | 12 +++++++++
mm/slub.c | 6 ++---
10 files changed, 67 insertions(+), 50 deletions(-)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 39b81dc7d01a..d382db71e300 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -31,7 +31,6 @@ struct vm_area_struct;
#define ___GFP_HARDWALL 0x20000u
#define ___GFP_THISNODE 0x40000u
#define ___GFP_RECLAIMABLE 0x80000u
-#define ___GFP_KMEMCG 0x100000u
#define ___GFP_NOTRACK 0x200000u
#define ___GFP_NO_KSWAPD 0x400000u
#define ___GFP_OTHER_NODE 0x800000u
@@ -91,7 +90,6 @@ struct vm_area_struct;
#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
-#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
/*
@@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
alloc_pages_vma(gfp_mask, 0, vma, addr, node)
+extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
+extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
+ unsigned int order);
+
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
@@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order);
extern void free_hot_cold_page(struct page *page, int cold);
extern void free_hot_cold_page_list(struct list_head *list, int cold);
-extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
-extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
+extern void __free_kmem_pages(struct page *page, unsigned int order);
+extern void free_kmem_pages(unsigned long addr, unsigned int order);
#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr), 0)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 29068dd26c3d..13acdb5259f5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -543,7 +543,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
* res_counter_charge_nofail, but we hope those allocations are rare,
* and won't be worth the trouble.
*/
- if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
+ if (gfp & __GFP_NOFAIL)
return true;
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
return true;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 3dd389aa91c7..eaaf2f69c54d 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -358,16 +358,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
#include <linux/slub_def.h>
#endif
-static __always_inline void *
-kmalloc_order(size_t size, gfp_t flags, unsigned int order)
-{
- void *ret;
-
- flags |= (__GFP_COMP | __GFP_KMEMCG);
- ret = (void *) __get_free_pages(flags, order);
- kmemleak_alloc(ret, size, 1, flags);
- return ret;
-}
+extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
#ifdef CONFIG_TRACING
extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index fddbe2023a5d..1807bb194816 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -61,8 +61,6 @@ extern long do_no_restart_syscall(struct restart_block *parm);
# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
#endif
-#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
-
/*
* flag set/clear/test wrappers
* - pass TIF_xxxx constants to these functions
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index 1eddbf1557f2..d6fd8e5b14b7 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -34,7 +34,6 @@
{(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
{(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
{(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
- {(unsigned long)__GFP_KMEMCG, "GFP_KMEMCG"}, \
{(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
{(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
{(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
diff --git a/kernel/fork.c b/kernel/fork.c
index f4b09bc15f3a..a1632c878037 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -150,15 +150,15 @@ void __weak arch_release_thread_info(struct thread_info *ti)
static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
int node)
{
- struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
- THREAD_SIZE_ORDER);
+ struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
+ THREAD_SIZE_ORDER);
return page ? page_address(page) : NULL;
}
static inline void free_thread_info(struct thread_info *ti)
{
- free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+ free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
}
# else
static struct kmem_cache *thread_info_cache;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 71e1e11ab6a2..ba30b26fc483 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3531,11 +3531,12 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
/*
* Disabling accounting is only relevant for some specific memcg
* internal allocations. Therefore we would initially not have such
- * check here, since direct calls to the page allocator that are marked
- * with GFP_KMEMCG only happen outside memcg core. We are mostly
- * concerned with cache allocations, and by having this test at
- * memcg_kmem_get_cache, we are already able to relay the allocation to
- * the root cache and bypass the memcg cache altogether.
+ * check here, since direct calls to the page allocator that are
+ * accounted to kmemcg (alloc_kmem_pages and friends) only happen
+ * outside memcg core. We are mostly concerned with cache allocations,
+ * and by having this test at memcg_kmem_get_cache, we are already able
+ * to relay the allocation to the root cache and bypass the memcg cache
+ * altogether.
*
* There is one exception, though: the SLUB allocator does not create
* large order caches, but rather service large kmallocs directly from
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0327f9d5a8c0..41378986a1e6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2723,7 +2723,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
int migratetype = allocflags_to_migratetype(gfp_mask);
unsigned int cpuset_mems_cookie;
int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
- struct mem_cgroup *memcg = NULL;
gfp_mask &= gfp_allowed_mask;
@@ -2742,13 +2741,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (unlikely(!zonelist->_zonerefs->zone))
return NULL;
- /*
- * Will only have any effect when __GFP_KMEMCG is set. This is
- * verified in the (always inline) callee
- */
- if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
- return NULL;
-
retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
@@ -2810,8 +2802,6 @@ out:
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
goto retry_cpuset;
- memcg_kmem_commit_charge(page, memcg, order);
-
if (page)
set_page_owner(page, order, gfp_mask);
@@ -2868,27 +2858,51 @@ void free_pages(unsigned long addr, unsigned int order)
EXPORT_SYMBOL(free_pages);
/*
- * __free_memcg_kmem_pages and free_memcg_kmem_pages will free
- * pages allocated with __GFP_KMEMCG.
+ * alloc_kmem_pages charges newly allocated pages to the kmem resource counter
+ * of the current memory cgroup.
*
- * Those pages are accounted to a particular memcg, embedded in the
- * corresponding page_cgroup. To avoid adding a hit in the allocator to search
- * for that information only to find out that it is NULL for users who have no
- * interest in that whatsoever, we provide these functions.
- *
- * The caller knows better which flags it relies on.
+ * It should be used when the caller would like to use kmalloc, but since the
+ * allocation is large, it has to fall back to the page allocator.
+ */
+struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
+{
+ struct page *page;
+ struct mem_cgroup *memcg = NULL;
+
+ if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+ return NULL;
+ page = alloc_pages(gfp_mask, order);
+ memcg_kmem_commit_charge(page, memcg, order);
+ return page;
+}
+
+struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
+{
+ struct page *page;
+ struct mem_cgroup *memcg = NULL;
+
+ if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+ return NULL;
+ page = alloc_pages_node(nid, gfp_mask, order);
+ memcg_kmem_commit_charge(page, memcg, order);
+ return page;
+}
+
+/*
+ * __free_kmem_pages and free_kmem_pages will free pages allocated with
+ * alloc_kmem_pages.
*/
-void __free_memcg_kmem_pages(struct page *page, unsigned int order)
+void __free_kmem_pages(struct page *page, unsigned int order)
{
memcg_kmem_uncharge_pages(page, order);
__free_pages(page, order);
}
-void free_memcg_kmem_pages(unsigned long addr, unsigned int order)
+void free_kmem_pages(unsigned long addr, unsigned int order)
{
if (addr != 0) {
VM_BUG_ON(!virt_addr_valid((void *)addr));
- __free_memcg_kmem_pages(virt_to_page((void *)addr), order);
+ __free_kmem_pages(virt_to_page((void *)addr), order);
}
}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 6673597ac967..cab4c49b3e8c 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -573,6 +573,18 @@ void __init create_kmalloc_caches(unsigned long flags)
}
#endif /* !CONFIG_SLOB */
+void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
+{
+ void *ret;
+ struct page *page;
+
+ flags |= __GFP_COMP;
+ page = alloc_kmem_pages(flags, order);
+ ret = page ? page_address(page) : NULL;
+ kmemleak_alloc(ret, size, 1, flags);
+ return ret;
+}
+
#ifdef CONFIG_TRACING
void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
{
diff --git a/mm/slub.c b/mm/slub.c
index b203cfceff95..fa7a1817835e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3335,8 +3335,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
struct page *page;
void *ptr = NULL;
- flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
- page = alloc_pages_node(node, flags, get_order(size));
+ flags |= __GFP_COMP | __GFP_NOTRACK;
+ page = alloc_kmem_pages_node(node, flags, get_order(size));
if (page)
ptr = page_address(page);
@@ -3405,7 +3405,7 @@ void kfree(const void *x)
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
kfree_hook(x);
- __free_memcg_kmem_pages(page, compound_order(page));
+ __free_kmem_pages(page, compound_order(page));
return;
}
slab_free(page->slab_cache, page, object, _RET_IP_);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 32+ messages in thread* Re: [PATCH -mm v2.2] mm: get rid of __GFP_KMEMCG
2014-04-03 15:05 ` Vladimir Davydov
@ 2014-04-10 23:38 ` Andrew Morton
-1 siblings, 0 replies; 32+ messages in thread
From: Andrew Morton @ 2014-04-10 23:38 UTC (permalink / raw)
To: Vladimir Davydov
Cc: linux-kernel, linux-mm, devel, Greg Thelen, Johannes Weiner,
Michal Hocko, Glauber Costa, Christoph Lameter, Pekka Enberg
On Thu, 3 Apr 2014 19:05:59 +0400 Vladimir Davydov <vdavydov@parallels.com> wrote:
> Currently to allocate a page that should be charged to kmemcg (e.g.
> threadinfo), we pass __GFP_KMEMCG flag to the page allocator. The page
> allocated is then to be freed by free_memcg_kmem_pages. Apart from
> looking asymmetrical, this also requires intrusion to the general
> allocation path. So let's introduce separate functions that will
> alloc/free pages charged to kmemcg.
>
> The new functions are called alloc_kmem_pages and free_kmem_pages. They
> should be used when the caller actually would like to use kmalloc, but
> has to fall back to the page allocator for the allocation is large. They
> only differ from alloc_pages and free_pages in that besides allocating
> or freeing pages they also charge them to the kmem resource counter of
> the current memory cgroup.
>
> ...
>
> +void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
> +{
> + void *ret;
> + struct page *page;
> +
> + flags |= __GFP_COMP;
> + page = alloc_kmem_pages(flags, order);
> + ret = page ? page_address(page) : NULL;
> + kmemleak_alloc(ret, size, 1, flags);
> + return ret;
> +}
While we're in there it wouldn't hurt to document this: why it exists,
what it does, etc. And why it sets __GFP_COMP.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 32+ messages in thread* Re: [PATCH -mm v2.2] mm: get rid of __GFP_KMEMCG
@ 2014-04-10 23:38 ` Andrew Morton
0 siblings, 0 replies; 32+ messages in thread
From: Andrew Morton @ 2014-04-10 23:38 UTC (permalink / raw)
To: Vladimir Davydov
Cc: linux-kernel, linux-mm, devel, Greg Thelen, Johannes Weiner,
Michal Hocko, Glauber Costa, Christoph Lameter, Pekka Enberg
On Thu, 3 Apr 2014 19:05:59 +0400 Vladimir Davydov <vdavydov@parallels.com> wrote:
> Currently to allocate a page that should be charged to kmemcg (e.g.
> threadinfo), we pass __GFP_KMEMCG flag to the page allocator. The page
> allocated is then to be freed by free_memcg_kmem_pages. Apart from
> looking asymmetrical, this also requires intrusion to the general
> allocation path. So let's introduce separate functions that will
> alloc/free pages charged to kmemcg.
>
> The new functions are called alloc_kmem_pages and free_kmem_pages. They
> should be used when the caller actually would like to use kmalloc, but
> has to fall back to the page allocator for the allocation is large. They
> only differ from alloc_pages and free_pages in that besides allocating
> or freeing pages they also charge them to the kmem resource counter of
> the current memory cgroup.
>
> ...
>
> +void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
> +{
> + void *ret;
> + struct page *page;
> +
> + flags |= __GFP_COMP;
> + page = alloc_kmem_pages(flags, order);
> + ret = page ? page_address(page) : NULL;
> + kmemleak_alloc(ret, size, 1, flags);
> + return ret;
> +}
While we're in there it wouldn't hurt to document this: why it exists,
what it does, etc. And why it sets __GFP_COMP.
^ permalink raw reply [flat|nested] 32+ messages in thread* [PATCH -mm] slab: document kmalloc_order
2014-04-10 23:38 ` Andrew Morton
@ 2014-04-11 12:52 ` Vladimir Davydov
-1 siblings, 0 replies; 32+ messages in thread
From: Vladimir Davydov @ 2014-04-11 12:52 UTC (permalink / raw)
To: akpm
Cc: cl, penberg, gthelen, hannes, mhocko, glommer, linux-kernel,
linux-mm, devel
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
---
mm/slab_common.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index cab4c49b3e8c..3ffd2e76b5d2 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -573,6 +573,11 @@ void __init create_kmalloc_caches(unsigned long flags)
}
#endif /* !CONFIG_SLOB */
+/*
+ * To avoid unnecessary overhead, we pass through large allocation requests
+ * directly to the page allocator. We use __GFP_COMP, because we will need to
+ * know the allocation order to free the pages properly in kfree.
+ */
void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
{
void *ret;
--
1.7.10.4
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 32+ messages in thread* [PATCH -mm] slab: document kmalloc_order
@ 2014-04-11 12:52 ` Vladimir Davydov
0 siblings, 0 replies; 32+ messages in thread
From: Vladimir Davydov @ 2014-04-11 12:52 UTC (permalink / raw)
To: akpm
Cc: cl, penberg, gthelen, hannes, mhocko, glommer, linux-kernel,
linux-mm, devel
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
---
mm/slab_common.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index cab4c49b3e8c..3ffd2e76b5d2 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -573,6 +573,11 @@ void __init create_kmalloc_caches(unsigned long flags)
}
#endif /* !CONFIG_SLOB */
+/*
+ * To avoid unnecessary overhead, we pass through large allocation requests
+ * directly to the page allocator. We use __GFP_COMP, because we will need to
+ * know the allocation order to free the pages properly in kfree.
+ */
void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
{
void *ret;
--
1.7.10.4
^ permalink raw reply related [flat|nested] 32+ messages in thread* Re: [PATCH -mm] slab: document kmalloc_order
2014-04-11 12:52 ` Vladimir Davydov
@ 2014-04-11 15:57 ` Christoph Lameter
-1 siblings, 0 replies; 32+ messages in thread
From: Christoph Lameter @ 2014-04-11 15:57 UTC (permalink / raw)
To: Vladimir Davydov
Cc: akpm, penberg, gthelen, hannes, mhocko, glommer, linux-kernel,
linux-mm, devel
On Fri, 11 Apr 2014, Vladimir Davydov wrote:
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index cab4c49b3e8c..3ffd2e76b5d2 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -573,6 +573,11 @@ void __init create_kmalloc_caches(unsigned long flags)
> }
> #endif /* !CONFIG_SLOB */
>
> +/*
> + * To avoid unnecessary overhead, we pass through large allocation requests
> + * directly to the page allocator. We use __GFP_COMP, because we will need to
> + * know the allocation order to free the pages properly in kfree.
> + */
> void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
> {
> void *ret;
>
??? kmalloc_order is defined in include/linux/slab.h
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 32+ messages in thread* Re: [PATCH -mm] slab: document kmalloc_order
@ 2014-04-11 15:57 ` Christoph Lameter
0 siblings, 0 replies; 32+ messages in thread
From: Christoph Lameter @ 2014-04-11 15:57 UTC (permalink / raw)
To: Vladimir Davydov
Cc: akpm, penberg, gthelen, hannes, mhocko, glommer, linux-kernel,
linux-mm, devel
On Fri, 11 Apr 2014, Vladimir Davydov wrote:
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index cab4c49b3e8c..3ffd2e76b5d2 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -573,6 +573,11 @@ void __init create_kmalloc_caches(unsigned long flags)
> }
> #endif /* !CONFIG_SLOB */
>
> +/*
> + * To avoid unnecessary overhead, we pass through large allocation requests
> + * directly to the page allocator. We use __GFP_COMP, because we will need to
> + * know the allocation order to free the pages properly in kfree.
> + */
> void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
> {
> void *ret;
>
??? kmalloc_order is defined in include/linux/slab.h
^ permalink raw reply [flat|nested] 32+ messages in thread* Re: [PATCH -mm] slab: document kmalloc_order
2014-04-11 15:57 ` Christoph Lameter
@ 2014-04-11 17:24 ` Vladimir Davydov
-1 siblings, 0 replies; 32+ messages in thread
From: Vladimir Davydov @ 2014-04-11 17:24 UTC (permalink / raw)
To: Christoph Lameter
Cc: akpm, penberg, gthelen, hannes, mhocko, glommer, linux-kernel,
linux-mm, devel
On 04/11/2014 07:57 PM, Christoph Lameter wrote:
> On Fri, 11 Apr 2014, Vladimir Davydov wrote:
>
>> diff --git a/mm/slab_common.c b/mm/slab_common.c
>> index cab4c49b3e8c..3ffd2e76b5d2 100644
>> --- a/mm/slab_common.c
>> +++ b/mm/slab_common.c
>> @@ -573,6 +573,11 @@ void __init create_kmalloc_caches(unsigned long flags)
>> }
>> #endif /* !CONFIG_SLOB */
>>
>> +/*
>> + * To avoid unnecessary overhead, we pass through large allocation requests
>> + * directly to the page allocator. We use __GFP_COMP, because we will need to
>> + * know the allocation order to free the pages properly in kfree.
>> + */
>> void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
>> {
>> void *ret;
>>
> ??? kmalloc_order is defined in include/linux/slab.h
I moved it to slab_common.c in "[PATCH -mm v2.2] mm: get rid of
__GFP_KMEMCG"
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 32+ messages in thread* Re: [PATCH -mm] slab: document kmalloc_order
@ 2014-04-11 17:24 ` Vladimir Davydov
0 siblings, 0 replies; 32+ messages in thread
From: Vladimir Davydov @ 2014-04-11 17:24 UTC (permalink / raw)
To: Christoph Lameter
Cc: akpm, penberg, gthelen, hannes, mhocko, glommer, linux-kernel,
linux-mm, devel
On 04/11/2014 07:57 PM, Christoph Lameter wrote:
> On Fri, 11 Apr 2014, Vladimir Davydov wrote:
>
>> diff --git a/mm/slab_common.c b/mm/slab_common.c
>> index cab4c49b3e8c..3ffd2e76b5d2 100644
>> --- a/mm/slab_common.c
>> +++ b/mm/slab_common.c
>> @@ -573,6 +573,11 @@ void __init create_kmalloc_caches(unsigned long flags)
>> }
>> #endif /* !CONFIG_SLOB */
>>
>> +/*
>> + * To avoid unnecessary overhead, we pass through large allocation requests
>> + * directly to the page allocator. We use __GFP_COMP, because we will need to
>> + * know the allocation order to free the pages properly in kfree.
>> + */
>> void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
>> {
>> void *ret;
>>
> ??? kmalloc_order is defined in include/linux/slab.h
I moved it to slab_common.c in "[PATCH -mm v2.2] mm: get rid of
__GFP_KMEMCG"
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH -mm v2.2] mm: get rid of __GFP_KMEMCG
2014-04-03 15:05 ` Vladimir Davydov
@ 2014-04-11 16:07 ` Christoph Lameter
-1 siblings, 0 replies; 32+ messages in thread
From: Christoph Lameter @ 2014-04-11 16:07 UTC (permalink / raw)
To: Vladimir Davydov
Cc: akpm, linux-kernel, linux-mm, devel, Greg Thelen, Johannes Weiner,
Michal Hocko, Glauber Costa, Pekka Enberg
On Thu, 3 Apr 2014, Vladimir Davydov wrote:
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -358,16 +358,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
> #include <linux/slub_def.h>
> #endif
>
> -static __always_inline void *
> -kmalloc_order(size_t size, gfp_t flags, unsigned int order)
> -{
> - void *ret;
> -
> - flags |= (__GFP_COMP | __GFP_KMEMCG);
> - ret = (void *) __get_free_pages(flags, order);
> - kmemleak_alloc(ret, size, 1, flags);
> - return ret;
> -}
> +extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
Hmmm... This was intentional inlined to allow inline expansion for calls
to kmalloc with large constants. The inline expansion directly converts
these calls to page allocator calls avoiding slab overhead.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 32+ messages in thread* Re: [PATCH -mm v2.2] mm: get rid of __GFP_KMEMCG
@ 2014-04-11 16:07 ` Christoph Lameter
0 siblings, 0 replies; 32+ messages in thread
From: Christoph Lameter @ 2014-04-11 16:07 UTC (permalink / raw)
To: Vladimir Davydov
Cc: akpm, linux-kernel, linux-mm, devel, Greg Thelen, Johannes Weiner,
Michal Hocko, Glauber Costa, Pekka Enberg
On Thu, 3 Apr 2014, Vladimir Davydov wrote:
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -358,16 +358,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
> #include <linux/slub_def.h>
> #endif
>
> -static __always_inline void *
> -kmalloc_order(size_t size, gfp_t flags, unsigned int order)
> -{
> - void *ret;
> -
> - flags |= (__GFP_COMP | __GFP_KMEMCG);
> - ret = (void *) __get_free_pages(flags, order);
> - kmemleak_alloc(ret, size, 1, flags);
> - return ret;
> -}
> +extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
Hmmm... This was intentional inlined to allow inline expansion for calls
to kmalloc with large constants. The inline expansion directly converts
these calls to page allocator calls avoiding slab overhead.
^ permalink raw reply [flat|nested] 32+ messages in thread* Re: [PATCH -mm v2.2] mm: get rid of __GFP_KMEMCG
2014-04-11 16:07 ` Christoph Lameter
@ 2014-04-11 17:33 ` Vladimir Davydov
-1 siblings, 0 replies; 32+ messages in thread
From: Vladimir Davydov @ 2014-04-11 17:33 UTC (permalink / raw)
To: Christoph Lameter
Cc: akpm, linux-kernel, linux-mm, devel, Greg Thelen, Johannes Weiner,
Michal Hocko, Glauber Costa, Pekka Enberg
On 04/11/2014 08:07 PM, Christoph Lameter wrote:
> On Thu, 3 Apr 2014, Vladimir Davydov wrote:
>
>> --- a/include/linux/slab.h
>> +++ b/include/linux/slab.h
>> @@ -358,16 +358,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
>> #include <linux/slub_def.h>
>> #endif
>>
>> -static __always_inline void *
>> -kmalloc_order(size_t size, gfp_t flags, unsigned int order)
>> -{
>> - void *ret;
>> -
>> - flags |= (__GFP_COMP | __GFP_KMEMCG);
>> - ret = (void *) __get_free_pages(flags, order);
>> - kmemleak_alloc(ret, size, 1, flags);
>> - return ret;
>> -}
>> +extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
>
> Hmmm... This was intentional inlined to allow inline expansion for calls
> to kmalloc with large constants. The inline expansion directly converts
> these calls to page allocator calls avoiding slab overhead.
I moved kmalloc_order() to slab_common.c, because I can't call
alloc_kmem_pages() directly from the header (we don't have
page_address() defined there, and including mm.h to slab.h wouldn't be
good I think), and I don't want to introduce __get_free_kmem_pages().
Sorry that I didn't state this explicitly in the comment to the patch.
However, would it be any better if I introduced __get_free_kmem_pages()
and called it from kmalloc_order(), which could be inlined then? I don't
think so, because I would have to place __get_free_kmem_pages() in
page_alloc.c just like __get_free_pages() (again, because including mm.h
to gfp.h for page_address() isn't an option), and we would get exactly
the same number of function calls.
I admit that this patch adds one extra function call to large kmallocs:
- before: __get_free_pages -> alloc_pages
- after: kmalloc_order -> alloc_kmem_pages -> alloc_pages
but that's not because I move kmalloc_order from the header, but rather
because I introduce alloc_kmem_pages, which is not inline.
What can we do to eliminate it? We could place alloc_kmem_pages()
definition to a header file, but since it needs memcontrol.h for kmemcg
charging functions, that would lead to slab.h depending on memcontrol.h
eventually, which is not good IMO.
Alternatively, we could
#ifndef MEMCG_KMEM
# define alloc_kmem_pages alloc_pages
#endif
so that we would avoid any additional overhead if kmemcg is compiled out.
However, do we need to bother about one function call at all? My point
is that one function call can be neglected in case of large kmem
allocations, which are rather rare.
Any thoughts/objections?
Thanks.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 32+ messages in thread* Re: [PATCH -mm v2.2] mm: get rid of __GFP_KMEMCG
@ 2014-04-11 17:33 ` Vladimir Davydov
0 siblings, 0 replies; 32+ messages in thread
From: Vladimir Davydov @ 2014-04-11 17:33 UTC (permalink / raw)
To: Christoph Lameter
Cc: akpm, linux-kernel, linux-mm, devel, Greg Thelen, Johannes Weiner,
Michal Hocko, Glauber Costa, Pekka Enberg
On 04/11/2014 08:07 PM, Christoph Lameter wrote:
> On Thu, 3 Apr 2014, Vladimir Davydov wrote:
>
>> --- a/include/linux/slab.h
>> +++ b/include/linux/slab.h
>> @@ -358,16 +358,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
>> #include <linux/slub_def.h>
>> #endif
>>
>> -static __always_inline void *
>> -kmalloc_order(size_t size, gfp_t flags, unsigned int order)
>> -{
>> - void *ret;
>> -
>> - flags |= (__GFP_COMP | __GFP_KMEMCG);
>> - ret = (void *) __get_free_pages(flags, order);
>> - kmemleak_alloc(ret, size, 1, flags);
>> - return ret;
>> -}
>> +extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
>
> Hmmm... This was intentional inlined to allow inline expansion for calls
> to kmalloc with large constants. The inline expansion directly converts
> these calls to page allocator calls avoiding slab overhead.
I moved kmalloc_order() to slab_common.c, because I can't call
alloc_kmem_pages() directly from the header (we don't have
page_address() defined there, and including mm.h to slab.h wouldn't be
good I think), and I don't want to introduce __get_free_kmem_pages().
Sorry that I didn't state this explicitly in the comment to the patch.
However, would it be any better if I introduced __get_free_kmem_pages()
and called it from kmalloc_order(), which could be inlined then? I don't
think so, because I would have to place __get_free_kmem_pages() in
page_alloc.c just like __get_free_pages() (again, because including mm.h
to gfp.h for page_address() isn't an option), and we would get exactly
the same number of function calls.
I admit that this patch adds one extra function call to large kmallocs:
- before: __get_free_pages -> alloc_pages
- after: kmalloc_order -> alloc_kmem_pages -> alloc_pages
but that's not because I move kmalloc_order from the header, but rather
because I introduce alloc_kmem_pages, which is not inline.
What can we do to eliminate it? We could place alloc_kmem_pages()
definition to a header file, but since it needs memcontrol.h for kmemcg
charging functions, that would lead to slab.h depending on memcontrol.h
eventually, which is not good IMO.
Alternatively, we could
#ifndef MEMCG_KMEM
# define alloc_kmem_pages alloc_pages
#endif
so that we would avoid any additional overhead if kmemcg is compiled out.
However, do we need to bother about one function call at all? My point
is that one function call can be neglected in case of large kmem
allocations, which are rather rare.
Any thoughts/objections?
Thanks.
^ permalink raw reply [flat|nested] 32+ messages in thread