* [PATCH 07/11] mm/zsmalloc, zswap: Handle objcg charging and lifetime in zsmalloc
2026-03-11 19:51 [PATCH 00/11] mm/zswap, zsmalloc: Per-memcg-lruvec zswap accounting Joshua Hahn
@ 2026-03-11 19:51 ` Joshua Hahn
2026-03-12 21:42 ` Johannes Weiner
2026-03-11 19:51 ` [PATCH 08/11] mm/memcontrol: Track MEMCG_ZSWAPPED in bytes Joshua Hahn
` (2 subsequent siblings)
3 siblings, 1 reply; 10+ messages in thread
From: Joshua Hahn @ 2026-03-11 19:51 UTC (permalink / raw)
To: Minchan Kim, Sergey Senozhatsky
Cc: Johannes Weiner, Yosry Ahmed, Nhat Pham, Nhat Pham,
Chengming Zhou, Michal Hocko, Roman Gushchin, Shakeel Butt,
Muchun Song, Andrew Morton, cgroups, linux-mm, linux-kernel,
kernel-team
Now that zswap_entries do not directly track obj_cgroups of the entries,
handle the lifetime management and charging of these entries into the
zsmalloc layer.
One functional change is that zswap entries are now no longer accounted
by the size of the compressed object, but by the size of the size_class
slot they occupy.
This brings charging one step closer to an accurate representation of
the memory consumed in the zpdesc; even if a compressed object doesn't
consume the entirety of a obj slot, we should account the entirety of
the compressed object slot that the object makes unusable.
While at it, also remove an unnecessary newline in obj_free.
Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
---
include/linux/memcontrol.h | 10 ------
mm/memcontrol.c | 54 ++-----------------------------
mm/zsmalloc.c | 65 ++++++++++++++++++++++++++++++++++++--
mm/zswap.c | 8 -----
4 files changed, 66 insertions(+), 71 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0652db4ff2d5..701d9ab6fef1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1851,22 +1851,12 @@ static inline bool memcg_is_dying(struct mem_cgroup *memcg)
#if defined(CONFIG_MEMCG) && defined(CONFIG_ZSWAP)
bool obj_cgroup_may_zswap(struct obj_cgroup *objcg);
-void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size);
-void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size);
bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg);
#else
static inline bool obj_cgroup_may_zswap(struct obj_cgroup *objcg)
{
return true;
}
-static inline void obj_cgroup_charge_zswap(struct obj_cgroup *objcg,
- size_t size)
-{
-}
-static inline void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg,
- size_t size)
-{
-}
static inline bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
{
/* if zswap is disabled, do not block pages going to the swapping device */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a52da3a5e4fd..68139be66a4f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -716,6 +716,7 @@ void mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx,
put_cpu();
}
+EXPORT_SYMBOL(mod_memcg_state);
#ifdef CONFIG_MEMCG_V1
/* idx can be of type enum memcg_stat_item or node_stat_item. */
@@ -3169,11 +3170,13 @@ int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size)
{
return obj_cgroup_charge_account(objcg, gfp, size, NULL, 0);
}
+EXPORT_SYMBOL(obj_cgroup_charge);
void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
{
refill_obj_stock(objcg, size, true, 0, NULL, 0);
}
+EXPORT_SYMBOL(obj_cgroup_uncharge);
static inline size_t obj_full_size(struct kmem_cache *s)
{
@@ -5488,57 +5491,6 @@ bool obj_cgroup_may_zswap(struct obj_cgroup *objcg)
return ret;
}
-/**
- * obj_cgroup_charge_zswap - charge compression backend memory
- * @objcg: the object cgroup
- * @size: size of compressed object
- *
- * This forces the charge after obj_cgroup_may_zswap() allowed
- * compression and storage in zswap for this cgroup to go ahead.
- */
-void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
-{
- struct mem_cgroup *memcg;
-
- if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
- return;
-
- VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC));
-
- /* PF_MEMALLOC context, charging must succeed */
- if (obj_cgroup_charge(objcg, GFP_KERNEL, size))
- VM_WARN_ON_ONCE(1);
-
- rcu_read_lock();
- memcg = obj_cgroup_memcg(objcg);
- mod_memcg_state(memcg, MEMCG_ZSWAP_B, size);
- mod_memcg_state(memcg, MEMCG_ZSWAPPED, 1);
- rcu_read_unlock();
-}
-
-/**
- * obj_cgroup_uncharge_zswap - uncharge compression backend memory
- * @objcg: the object cgroup
- * @size: size of compressed object
- *
- * Uncharges zswap memory on page in.
- */
-void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
-{
- struct mem_cgroup *memcg;
-
- if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
- return;
-
- obj_cgroup_uncharge(objcg, size);
-
- rcu_read_lock();
- memcg = obj_cgroup_memcg(objcg);
- mod_memcg_state(memcg, MEMCG_ZSWAP_B, -size);
- mod_memcg_state(memcg, MEMCG_ZSWAPPED, -1);
- rcu_read_unlock();
-}
-
bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
{
/* if zswap is disabled, do not block pages going to the swapping device */
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index a94ca8c26ad9..291194572a09 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1028,6 +1028,59 @@ static bool zspage_empty(struct zspage *zspage)
return get_zspage_inuse(zspage) == 0;
}
+#ifdef CONFIG_MEMCG
+static void zs_charge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
+ int size)
+{
+ struct mem_cgroup *memcg;
+
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ return;
+
+ VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC));
+ WARN_ON_ONCE(!pool->memcg_aware);
+
+ /* PF_MEMALLOC context, charging must succeed */
+ if (obj_cgroup_charge(objcg, GFP_KERNEL, size))
+ VM_WARN_ON_ONCE(1);
+
+ rcu_read_lock();
+ memcg = obj_cgroup_memcg(objcg);
+ mod_memcg_state(memcg, pool->compressed_stat, size);
+ mod_memcg_state(memcg, pool->uncompressed_stat, 1);
+ rcu_read_unlock();
+}
+
+static void zs_uncharge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
+ int size)
+{
+ struct mem_cgroup *memcg;
+
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ return;
+
+ WARN_ON_ONCE(!pool->memcg_aware);
+
+ obj_cgroup_uncharge(objcg, size);
+
+ rcu_read_lock();
+ memcg = obj_cgroup_memcg(objcg);
+ mod_memcg_state(memcg, pool->compressed_stat, -size);
+ mod_memcg_state(memcg, pool->uncompressed_stat, -1);
+ rcu_read_unlock();
+}
+#else
+static void zs_charge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
+ int size)
+{
+}
+
+static void zs_uncharge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
+ int size)
+{
+}
+#endif
+
/**
* zs_lookup_class_index() - Returns index of the zsmalloc &size_class
* that hold objects of the provided size.
@@ -1244,6 +1297,8 @@ void zs_obj_write(struct zs_pool *pool, unsigned long handle,
if (objcg) {
WARN_ON_ONCE(!pool->memcg_aware);
zspage->objcgs[obj_idx] = objcg;
+ obj_cgroup_get(objcg);
+ zs_charge_objcg(pool, objcg, class->size);
}
if (!ZsHugePage(zspage))
@@ -1409,17 +1464,23 @@ static void obj_free(int class_size, unsigned long obj)
struct link_free *link;
struct zspage *zspage;
struct zpdesc *f_zpdesc;
+ struct zs_pool *pool;
unsigned long f_offset;
unsigned int f_objidx;
void *vaddr;
-
obj_to_location(obj, &f_zpdesc, &f_objidx);
f_offset = offset_in_page(class_size * f_objidx);
zspage = get_zspage(f_zpdesc);
+ pool = zspage->pool;
+
+ if (pool->memcg_aware && zspage->objcgs[f_objidx]) {
+ struct obj_cgroup *objcg = zspage->objcgs[f_objidx];
- if (zspage->pool->memcg_aware)
+ zs_uncharge_objcg(pool, objcg, class_size);
+ obj_cgroup_put(objcg);
zspage->objcgs[f_objidx] = NULL;
+ }
vaddr = kmap_local_zpdesc(f_zpdesc);
link = (struct link_free *)(vaddr + f_offset);
diff --git a/mm/zswap.c b/mm/zswap.c
index 436066965413..bca29a6e18f3 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -711,10 +711,6 @@ static void zswap_entry_free(struct zswap_entry *entry)
zswap_lru_del(&zswap_list_lru, entry, objcg);
zs_free(entry->pool->zs_pool, entry->handle);
zswap_pool_put(entry->pool);
- if (objcg) {
- obj_cgroup_uncharge_zswap(objcg, entry->length);
- obj_cgroup_put(objcg);
- }
if (entry->length == PAGE_SIZE)
atomic_long_dec(&zswap_stored_incompressible_pages);
zswap_entry_cache_free(entry);
@@ -1437,10 +1433,6 @@ static bool zswap_store_page(struct page *page,
* when the entry is removed from the tree.
*/
zswap_pool_get(pool);
- if (objcg) {
- obj_cgroup_get(objcg);
- obj_cgroup_charge_zswap(objcg, entry->length);
- }
atomic_long_inc(&zswap_stored_pages);
if (entry->length == PAGE_SIZE)
atomic_long_inc(&zswap_stored_incompressible_pages);
--
2.52.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH 08/11] mm/memcontrol: Track MEMCG_ZSWAPPED in bytes
2026-03-11 19:51 [PATCH 00/11] mm/zswap, zsmalloc: Per-memcg-lruvec zswap accounting Joshua Hahn
2026-03-11 19:51 ` [PATCH 07/11] mm/zsmalloc, zswap: Handle objcg charging and lifetime in zsmalloc Joshua Hahn
@ 2026-03-11 19:51 ` Joshua Hahn
2026-03-11 20:33 ` Nhat Pham
2026-03-11 19:51 ` [PATCH 09/11] mm/vmstat, memcontrol: Track ZSWAP_B, ZSWAPPED_B per-memcg-lruvec Joshua Hahn
2026-03-11 19:54 ` [PATCH 00/11] mm/zswap, zsmalloc: Per-memcg-lruvec zswap accounting Joshua Hahn
3 siblings, 1 reply; 10+ messages in thread
From: Joshua Hahn @ 2026-03-11 19:51 UTC (permalink / raw)
To: Minchan Kim, Sergey Senozhatsky
Cc: Johannes Weiner, Yosry Ahmed, Nhat Pham, Nhat Pham,
Chengming Zhou, Michal Hocko, Roman Gushchin, Shakeel Butt,
Muchun Song, Andrew Morton, cgroups, linux-mm, linux-kernel,
kernel-team
Zswap compresses and uncompresses in PAGE_SIZE units, which simplifies
the accounting for how much memory it has compressed. However, when a
compressed object is stored at the boundary of two zspages, accounting
at a PAGE_SIZE granularity makes it difficult to fractionally charge
each backing zspage with the ratio of memory it backs for the
compressed object.
To make sub-PAGE_SIZE granularity charging possible for MEMCG_ZSWAPPED,
track the value in bytes and adjust its accounting accordingly.
No functional changes intended.
Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
---
include/linux/memcontrol.h | 2 +-
mm/memcontrol.c | 5 +++--
mm/zsmalloc.c | 4 ++--
mm/zswap.c | 8 +++++---
4 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 701d9ab6fef1..ce2e598b5963 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -38,7 +38,7 @@ enum memcg_stat_item {
MEMCG_VMALLOC,
MEMCG_KMEM,
MEMCG_ZSWAP_B,
- MEMCG_ZSWAPPED,
+ MEMCG_ZSWAPPED_B,
MEMCG_NR_STAT,
};
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 68139be66a4f..1cb02d2febe8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -342,7 +342,7 @@ static const unsigned int memcg_stat_items[] = {
MEMCG_VMALLOC,
MEMCG_KMEM,
MEMCG_ZSWAP_B,
- MEMCG_ZSWAPPED,
+ MEMCG_ZSWAPPED_B,
};
#define NR_MEMCG_NODE_STAT_ITEMS ARRAY_SIZE(memcg_node_stat_items)
@@ -1364,7 +1364,7 @@ static const struct memory_stat memory_stats[] = {
{ "shmem", NR_SHMEM },
#ifdef CONFIG_ZSWAP
{ "zswap", MEMCG_ZSWAP_B },
- { "zswapped", MEMCG_ZSWAPPED },
+ { "zswapped", MEMCG_ZSWAPPED_B },
#endif
{ "file_mapped", NR_FILE_MAPPED },
{ "file_dirty", NR_FILE_DIRTY },
@@ -1412,6 +1412,7 @@ static int memcg_page_state_unit(int item)
switch (item) {
case MEMCG_PERCPU_B:
case MEMCG_ZSWAP_B:
+ case MEMCG_ZSWAPPED_B:
case NR_SLAB_RECLAIMABLE_B:
case NR_SLAB_UNRECLAIMABLE_B:
return 1;
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 291194572a09..24665d7cd4a9 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1047,7 +1047,7 @@ static void zs_charge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
rcu_read_lock();
memcg = obj_cgroup_memcg(objcg);
mod_memcg_state(memcg, pool->compressed_stat, size);
- mod_memcg_state(memcg, pool->uncompressed_stat, 1);
+ mod_memcg_state(memcg, pool->uncompressed_stat, PAGE_SIZE);
rcu_read_unlock();
}
@@ -1066,7 +1066,7 @@ static void zs_uncharge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
rcu_read_lock();
memcg = obj_cgroup_memcg(objcg);
mod_memcg_state(memcg, pool->compressed_stat, -size);
- mod_memcg_state(memcg, pool->uncompressed_stat, -1);
+ mod_memcg_state(memcg, pool->uncompressed_stat, -(int)PAGE_SIZE);
rcu_read_unlock();
}
#else
diff --git a/mm/zswap.c b/mm/zswap.c
index bca29a6e18f3..d81e2db4490b 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -257,7 +257,7 @@ static struct zswap_pool *zswap_pool_create(char *compressor)
/* unique name for each pool specifically required by zsmalloc */
snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
pool->zs_pool = zs_create_pool(name, true, MEMCG_ZSWAP_B,
- MEMCG_ZSWAPPED);
+ MEMCG_ZSWAPPED_B);
if (!pool->zs_pool)
goto error;
@@ -1214,8 +1214,10 @@ static unsigned long zswap_shrinker_count(struct shrinker *shrinker,
*/
if (!mem_cgroup_disabled()) {
mem_cgroup_flush_stats(memcg);
- nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
- nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
+ nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B);
+ nr_backing >>= PAGE_SHIFT;
+ nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED_B);
+ nr_stored >>= PAGE_SHIFT;
} else {
nr_backing = zswap_total_pages();
nr_stored = atomic_long_read(&zswap_stored_pages);
--
2.52.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH 09/11] mm/vmstat, memcontrol: Track ZSWAP_B, ZSWAPPED_B per-memcg-lruvec
2026-03-11 19:51 [PATCH 00/11] mm/zswap, zsmalloc: Per-memcg-lruvec zswap accounting Joshua Hahn
2026-03-11 19:51 ` [PATCH 07/11] mm/zsmalloc, zswap: Handle objcg charging and lifetime in zsmalloc Joshua Hahn
2026-03-11 19:51 ` [PATCH 08/11] mm/memcontrol: Track MEMCG_ZSWAPPED in bytes Joshua Hahn
@ 2026-03-11 19:51 ` Joshua Hahn
2026-03-11 19:54 ` [PATCH 00/11] mm/zswap, zsmalloc: Per-memcg-lruvec zswap accounting Joshua Hahn
3 siblings, 0 replies; 10+ messages in thread
From: Joshua Hahn @ 2026-03-11 19:51 UTC (permalink / raw)
To: Minchan Kim, Sergey Senozhatsky
Cc: Johannes Weiner, Yosry Ahmed, Nhat Pham, Nhat Pham,
Chengming Zhou, Michal Hocko, Roman Gushchin, Shakeel Butt,
Muchun Song, Axel Rasmussen, Yuanchu Xie, Wei Xu,
David Hildenbrand, Lorenzo Stoakes, Liam R . Howlett,
Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Andrew Morton,
cgroups, linux-mm, linux-kernel, kernel-team
Now that memcg charging happens in the zsmalloc layer where we have both
objcg and page information, we can specify which node's memcg lruvec
zswapped memory should be accounted to.
Move MEMCG_ZSWAP_B and MEMCG_ZSWAPPED_B from enum memcg_stat_item to
enum node_stat_item. Reanme their prefixes from MEMCG to NR to reflect
this move as well.
In addition, decouple the updates of node stats (vmstat) and
memcg-lruvec stats, since node stats can only track values at a
PAGE_SIZE granularity.
As a result of tracking zswap statistics at a finer granularity, the
charging from zsmalloc also gets more complicated to cover the cases
when the compressed object spans two zpdescs, which both live on
different nodes. In this case, the memcg-lruvec of both node-memcg
combinations are partially charged.
memcg-lruvec stats are now updated precisely and proportionally when
compressed objects are split across pages. Unfortunately for node stats,
only NR_ZSWAP_B can be kept accurate. NR_ZSWAPPED_B works as a good
best-effort value, but cannot proportionally account for compressed
objects split across nodes due to the coarse PAGE_SIZE granularity of
node stats. For such objects, NR_ZSWAPPED_B is accounted to the first
zpdesc's node stats.
Note that this is not a new inaccuracy, but one that is simply left
unable to be fixed as part of these changes. The small inaccuracy is
accepted in place of invasive changes across all of vmstat
infrastructure to begin tracking stats at byte granularity.
Finally, note that handling of objcg migrations across zspages (and
their subsequent migrations across nodes) are handled in the next patch.
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
---
include/linux/memcontrol.h | 5 +-
include/linux/mmzone.h | 2 +
include/linux/zsmalloc.h | 6 +--
mm/memcontrol.c | 22 ++++----
mm/vmstat.c | 2 +
mm/zsmalloc.c | 104 +++++++++++++++++++++++++++----------
mm/zswap.c | 7 ++-
7 files changed, 102 insertions(+), 46 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ce2e598b5963..b03501e0c09b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -37,8 +37,6 @@ enum memcg_stat_item {
MEMCG_PERCPU_B,
MEMCG_VMALLOC,
MEMCG_KMEM,
- MEMCG_ZSWAP_B,
- MEMCG_ZSWAPPED_B,
MEMCG_NR_STAT,
};
@@ -927,6 +925,9 @@ struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
struct mem_cgroup *oom_domain);
void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
+void mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ int val);
+
/* idx can be of type enum memcg_stat_item or node_stat_item */
void mod_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx, int val);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3e51190a55e4..ae16a90491ac 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -258,6 +258,8 @@ enum node_stat_item {
#ifdef CONFIG_HUGETLB_PAGE
NR_HUGETLB,
#endif
+ NR_ZSWAP_B,
+ NR_ZSWAPPED_B,
NR_BALLOON_PAGES,
NR_KERNEL_FILE_PAGES,
NR_VM_NODE_STAT_ITEMS
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 6010d8dac9ff..fd79916c7740 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -24,11 +24,11 @@ struct zs_pool_stats {
struct zs_pool;
struct scatterlist;
struct obj_cgroup;
-enum memcg_stat_item;
+enum node_stat_item;
struct zs_pool *zs_create_pool(const char *name, bool memcg_aware,
- enum memcg_stat_item compressed_stat,
- enum memcg_stat_item uncompressed_stat);
+ enum node_stat_item compressed_stat,
+ enum node_stat_item uncompressed_stat);
void zs_destroy_pool(struct zs_pool *pool);
unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1cb02d2febe8..d87bc4beff16 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -333,6 +333,8 @@ static const unsigned int memcg_node_stat_items[] = {
#ifdef CONFIG_HUGETLB_PAGE
NR_HUGETLB,
#endif
+ NR_ZSWAP_B,
+ NR_ZSWAPPED_B,
};
static const unsigned int memcg_stat_items[] = {
@@ -341,8 +343,6 @@ static const unsigned int memcg_stat_items[] = {
MEMCG_PERCPU_B,
MEMCG_VMALLOC,
MEMCG_KMEM,
- MEMCG_ZSWAP_B,
- MEMCG_ZSWAPPED_B,
};
#define NR_MEMCG_NODE_STAT_ITEMS ARRAY_SIZE(memcg_node_stat_items)
@@ -737,9 +737,8 @@ unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
}
#endif
-static void mod_memcg_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx,
- int val)
+void mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ int val)
{
struct mem_cgroup_per_node *pn;
struct mem_cgroup *memcg;
@@ -766,6 +765,7 @@ static void mod_memcg_lruvec_state(struct lruvec *lruvec,
put_cpu();
}
+EXPORT_SYMBOL(mod_memcg_lruvec_state);
/**
* mod_lruvec_state - update lruvec memory statistics
@@ -1363,8 +1363,8 @@ static const struct memory_stat memory_stats[] = {
{ "vmalloc", MEMCG_VMALLOC },
{ "shmem", NR_SHMEM },
#ifdef CONFIG_ZSWAP
- { "zswap", MEMCG_ZSWAP_B },
- { "zswapped", MEMCG_ZSWAPPED_B },
+ { "zswap", NR_ZSWAP_B },
+ { "zswapped", NR_ZSWAPPED_B },
#endif
{ "file_mapped", NR_FILE_MAPPED },
{ "file_dirty", NR_FILE_DIRTY },
@@ -1411,8 +1411,8 @@ static int memcg_page_state_unit(int item)
{
switch (item) {
case MEMCG_PERCPU_B:
- case MEMCG_ZSWAP_B:
- case MEMCG_ZSWAPPED_B:
+ case NR_ZSWAP_B:
+ case NR_ZSWAPPED_B:
case NR_SLAB_RECLAIMABLE_B:
case NR_SLAB_UNRECLAIMABLE_B:
return 1;
@@ -5482,7 +5482,7 @@ bool obj_cgroup_may_zswap(struct obj_cgroup *objcg)
/* Force flush to get accurate stats for charging */
__mem_cgroup_flush_stats(memcg, true);
- pages = memcg_page_state(memcg, MEMCG_ZSWAP_B) / PAGE_SIZE;
+ pages = memcg_page_state(memcg, NR_ZSWAP_B) / PAGE_SIZE;
if (pages < max)
continue;
ret = false;
@@ -5511,7 +5511,7 @@ static u64 zswap_current_read(struct cgroup_subsys_state *css,
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
mem_cgroup_flush_stats(memcg);
- return memcg_page_state(memcg, MEMCG_ZSWAP_B);
+ return memcg_page_state(memcg, NR_ZSWAP_B);
}
static int zswap_max_show(struct seq_file *m, void *v)
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 86b14b0f77b5..389ff986ceac 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1279,6 +1279,8 @@ const char * const vmstat_text[] = {
#ifdef CONFIG_HUGETLB_PAGE
[I(NR_HUGETLB)] = "nr_hugetlb",
#endif
+ [I(NR_ZSWAP_B)] = "zswap",
+ [I(NR_ZSWAPPED_B)] = "zswapped",
[I(NR_BALLOON_PAGES)] = "nr_balloon_pages",
[I(NR_KERNEL_FILE_PAGES)] = "nr_kernel_file_pages",
#undef I
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 24665d7cd4a9..ab085961b0e2 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -216,8 +216,8 @@ struct zs_pool {
struct work_struct free_work;
#endif
bool memcg_aware;
- enum memcg_stat_item compressed_stat;
- enum memcg_stat_item uncompressed_stat;
+ enum node_stat_item compressed_stat;
+ enum node_stat_item uncompressed_stat;
/* protect zspage migration/compaction */
rwlock_t lock;
atomic_t compaction_in_progress;
@@ -823,6 +823,9 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
reset_zpdesc(zpdesc);
zpdesc_unlock(zpdesc);
zpdesc_dec_zone_page_state(zpdesc);
+ if (pool->memcg_aware)
+ dec_node_page_state(zpdesc_page(zpdesc),
+ pool->compressed_stat);
zpdesc_put(zpdesc);
zpdesc = next;
} while (zpdesc != NULL);
@@ -974,6 +977,9 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
__zpdesc_set_zsmalloc(zpdesc);
zpdesc_inc_zone_page_state(zpdesc);
+ if (pool->memcg_aware)
+ inc_node_page_state(zpdesc_page(zpdesc),
+ pool->compressed_stat);
zpdescs[i] = zpdesc;
}
@@ -985,6 +991,9 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
err:
while (--i >= 0) {
zpdesc_dec_zone_page_state(zpdescs[i]);
+ if (pool->memcg_aware)
+ dec_node_page_state(zpdesc_page(zpdescs[i]),
+ pool->compressed_stat);
free_zpdesc(zpdescs[i]);
}
if (pool->memcg_aware)
@@ -1029,10 +1038,48 @@ static bool zspage_empty(struct zspage *zspage)
}
#ifdef CONFIG_MEMCG
-static void zs_charge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
- int size)
+static void __zs_mod_memcg_lruvec(struct zs_pool *pool, struct zpdesc *zpdesc,
+ struct obj_cgroup *objcg, int size,
+ int sign, unsigned long offset)
{
struct mem_cgroup *memcg;
+ struct lruvec *lruvec;
+ int compressed_size = size, original_size = PAGE_SIZE;
+ int nid = page_to_nid(zpdesc_page(zpdesc));
+ int next_nid = nid;
+
+ if (offset + size > PAGE_SIZE) {
+ struct zpdesc *next_zpdesc = get_next_zpdesc(zpdesc);
+
+ next_nid = page_to_nid(zpdesc_page(next_zpdesc));
+ if (nid != next_nid) {
+ compressed_size = PAGE_SIZE - offset;
+ original_size = (PAGE_SIZE * compressed_size) / size;
+ }
+ }
+
+ rcu_read_lock();
+ memcg = obj_cgroup_memcg(objcg);
+ lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
+ mod_memcg_lruvec_state(lruvec, pool->compressed_stat,
+ sign * compressed_size);
+ mod_memcg_lruvec_state(lruvec, pool->uncompressed_stat,
+ sign * original_size);
+
+ if (nid != next_nid) {
+ lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(next_nid));
+ mod_memcg_lruvec_state(lruvec, pool->compressed_stat,
+ sign * (size - compressed_size));
+ mod_memcg_lruvec_state(lruvec, pool->uncompressed_stat,
+ sign * (PAGE_SIZE - original_size));
+ }
+ rcu_read_unlock();
+}
+
+static void zs_charge_objcg(struct zs_pool *pool, struct zpdesc *zpdesc,
+ struct obj_cgroup *objcg, int size,
+ unsigned long offset)
+{
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
return;
@@ -1044,18 +1091,19 @@ static void zs_charge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
if (obj_cgroup_charge(objcg, GFP_KERNEL, size))
VM_WARN_ON_ONCE(1);
- rcu_read_lock();
- memcg = obj_cgroup_memcg(objcg);
- mod_memcg_state(memcg, pool->compressed_stat, size);
- mod_memcg_state(memcg, pool->uncompressed_stat, PAGE_SIZE);
- rcu_read_unlock();
+ __zs_mod_memcg_lruvec(pool, zpdesc, objcg, size, 1, offset);
+
+ /*
+ * Node-level vmstats are charged in PAGE_SIZE units. As a best-effort,
+ * always charge the uncompressed stats to the first zpdesc.
+ */
+ inc_node_page_state(zpdesc_page(zpdesc), pool->uncompressed_stat);
}
-static void zs_uncharge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
- int size)
+static void zs_uncharge_objcg(struct zs_pool *pool, struct zpdesc *zpdesc,
+ struct obj_cgroup *objcg, int size,
+ unsigned long offset)
{
- struct mem_cgroup *memcg;
-
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
return;
@@ -1063,20 +1111,24 @@ static void zs_uncharge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
obj_cgroup_uncharge(objcg, size);
- rcu_read_lock();
- memcg = obj_cgroup_memcg(objcg);
- mod_memcg_state(memcg, pool->compressed_stat, -size);
- mod_memcg_state(memcg, pool->uncompressed_stat, -(int)PAGE_SIZE);
- rcu_read_unlock();
+ __zs_mod_memcg_lruvec(pool, zpdesc, objcg, size, -1, offset);
+
+ /*
+ * Node-level vmstats are charged in PAGE_SIZE units. As a best-effort,
+ * always uncharged the uncompressed stats from the first zpdesc.
+ */
+ dec_node_page_state(zpdesc_page(zpdesc), pool->uncompressed_stat);
}
#else
-static void zs_charge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
- int size)
+static void zs_charge_objcg(struct zs_pool *pool, struct zpdesc *zpdesc,
+ struct obj_cgroup *objcg, int size,
+ unsigned long offset)
{
}
-static void zs_uncharge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
- int size)
+static void zs_uncharge_objcg(struct zs_pool *pool, struct zpdesc *zpdesc,
+ struct obj_cgroup *objcg, int size,
+ unsigned long offset)
{
}
#endif
@@ -1298,7 +1350,7 @@ void zs_obj_write(struct zs_pool *pool, unsigned long handle,
WARN_ON_ONCE(!pool->memcg_aware);
zspage->objcgs[obj_idx] = objcg;
obj_cgroup_get(objcg);
- zs_charge_objcg(pool, objcg, class->size);
+ zs_charge_objcg(pool, zpdesc, objcg, class->size, off);
}
if (!ZsHugePage(zspage))
@@ -1477,7 +1529,7 @@ static void obj_free(int class_size, unsigned long obj)
if (pool->memcg_aware && zspage->objcgs[f_objidx]) {
struct obj_cgroup *objcg = zspage->objcgs[f_objidx];
- zs_uncharge_objcg(pool, objcg, class_size);
+ zs_uncharge_objcg(pool, f_zpdesc, objcg, class_size, f_offset);
obj_cgroup_put(objcg);
zspage->objcgs[f_objidx] = NULL;
}
@@ -2191,8 +2243,8 @@ static int calculate_zspage_chain_size(int class_size)
* otherwise NULL.
*/
struct zs_pool *zs_create_pool(const char *name, bool memcg_aware,
- enum memcg_stat_item compressed_stat,
- enum memcg_stat_item uncompressed_stat)
+ enum node_stat_item compressed_stat,
+ enum node_stat_item uncompressed_stat)
{
int i;
struct zs_pool *pool;
diff --git a/mm/zswap.c b/mm/zswap.c
index d81e2db4490b..2e9352b46693 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -256,8 +256,7 @@ static struct zswap_pool *zswap_pool_create(char *compressor)
/* unique name for each pool specifically required by zsmalloc */
snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
- pool->zs_pool = zs_create_pool(name, true, MEMCG_ZSWAP_B,
- MEMCG_ZSWAPPED_B);
+ pool->zs_pool = zs_create_pool(name, true, NR_ZSWAP_B, NR_ZSWAPPED_B);
if (!pool->zs_pool)
goto error;
@@ -1214,9 +1213,9 @@ static unsigned long zswap_shrinker_count(struct shrinker *shrinker,
*/
if (!mem_cgroup_disabled()) {
mem_cgroup_flush_stats(memcg);
- nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B);
+ nr_backing = memcg_page_state(memcg, NR_ZSWAP_B);
nr_backing >>= PAGE_SHIFT;
- nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED_B);
+ nr_stored = memcg_page_state(memcg, NR_ZSWAPPED_B);
nr_stored >>= PAGE_SHIFT;
} else {
nr_backing = zswap_total_pages();
--
2.52.0
^ permalink raw reply related [flat|nested] 10+ messages in thread