From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
Chris Li <chrisl@kernel.org>, Hugh Dickins <hughd@google.com>,
"Huang, Ying" <ying.huang@intel.com>,
Yosry Ahmed <yosryahmed@google.com>,
Roman Gushchin <roman.gushchin@linux.dev>,
Shakeel Butt <shakeel.butt@linux.dev>,
Johannes Weiner <hannes@cmpxchg.org>,
Barry Song <baohua@kernel.org>, Michal Hocko <mhocko@kernel.org>,
linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [PATCH 4/4] mm, swap_cgroup: remove global swap cgroup lock
Date: Tue, 3 Dec 2024 02:41:54 +0800 [thread overview]
Message-ID: <20241202184154.19321-5-ryncsn@gmail.com> (raw)
In-Reply-To: <20241202184154.19321-1-ryncsn@gmail.com>
From: Kairui Song <kasong@tencent.com>
commit e9e58a4ec3b1 ("memcg: avoid use cmpxchg in swap cgroup maintainance")
replaced the cmpxchg/xchg with a global irq spinlock because some archs
doesn't support 2 bytes cmpxchg/xchg. Clearly this won't scale well.
And as commented in swap_cgroup.c, this lock is not needed for map
synchronization.
Emulation of 2 bytes cmpxchg/xchg with atomic isn't hard, so implement
it to get rid of this lock.
Testing using 64G brd and build with build kernel with make -j96 in 1.5G
memory cgroup using 4k folios showed below improvement (10 test run):
Before this series:
Sys time: 10730.08 (stdev 49.030728)
Real time: 171.03 (stdev 0.850355)
After this commit:
Sys time: 9612.24 (stdev 66.310789), -10.42%
Real time: 159.78 (stdev 0.577193), -6.57%
With 64k folios and 2G memcg:
Before this series:
Sys time: 7626.77 (stdev 43.545517)
Real time: 136.22 (stdev 1.265544)
After this commit:
Sys time: 6936.03 (stdev 39.996280), -9.06%
Real time: 129.65 (stdev 0.880039), -4.82%
Sequential swapout of 8G 4k zero folios (24 test run):
Before this series:
5461409.12 us (stdev 183957.827084)
After this commit:
5420447.26 us (stdev 196419.240317)
Sequential swapin of 8G 4k zero folios (24 test run):
Before this series:
19736958.916667 us (stdev 189027.246676)
After this commit:
19662182.629630 us (stdev 172717.640614)
Performance is better or at least not worse for all tests above.
Signed-off-by: Kairui Song <kasong@tencent.com>
---
mm/swap_cgroup.c | 56 +++++++++++++++++++++++++++++++++++-------------
1 file changed, 41 insertions(+), 15 deletions(-)
diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
index a76afdc3666a..028f5e6be3f0 100644
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -5,6 +5,15 @@
#include <linux/swapops.h> /* depends on mm.h include */
+#define ID_PER_UNIT (sizeof(atomic_t) / sizeof(unsigned short))
+struct swap_cgroup_unit {
+ union {
+ int raw;
+ atomic_t val;
+ unsigned short __id[ID_PER_UNIT];
+ };
+};
+
static DEFINE_MUTEX(swap_cgroup_mutex);
struct swap_cgroup {
@@ -12,8 +21,10 @@ struct swap_cgroup {
};
struct swap_cgroup_ctrl {
- unsigned short *map;
- spinlock_t lock;
+ union {
+ struct swap_cgroup_unit *units;
+ unsigned short *map;
+ };
};
static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
@@ -31,6 +42,24 @@ static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
*
* TODO: we can push these buffers out to HIGHMEM.
*/
+static unsigned short __swap_cgroup_xchg(void *map,
+ pgoff_t offset,
+ unsigned int new_id)
+{
+ unsigned int old_id;
+ struct swap_cgroup_unit *units = map;
+ struct swap_cgroup_unit *unit = &units[offset / ID_PER_UNIT];
+ struct swap_cgroup_unit new, old = { .raw = atomic_read(&unit->val) };
+
+ do {
+ new.raw = old.raw;
+ old_id = old.__id[offset % ID_PER_UNIT];
+ new.__id[offset % ID_PER_UNIT] = new_id;
+ } while (!atomic_try_cmpxchg(&unit->val, &old.raw, new.raw));
+
+ return old_id;
+}
+
/**
* swap_cgroup_record - record mem_cgroup for a set of swap entries
* @ent: the first swap entry to be recorded into
@@ -44,22 +73,19 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
unsigned int nr_ents)
{
struct swap_cgroup_ctrl *ctrl;
- unsigned short *map;
- unsigned short old;
- unsigned long flags;
pgoff_t offset = swp_offset(ent);
pgoff_t end = offset + nr_ents;
+ unsigned short old, iter;
+ unsigned short *map;
ctrl = &swap_cgroup_ctrl[swp_type(ent)];
map = ctrl->map;
- spin_lock_irqsave(&ctrl->lock, flags);
- old = map[offset];
+ old = READ_ONCE(map[offset]);
do {
- VM_BUG_ON(map[offset] != old);
- map[offset] = id;
+ iter = __swap_cgroup_xchg(map, offset, id);
+ VM_BUG_ON(iter != old);
} while (++offset != end);
- spin_unlock_irqrestore(&ctrl->lock, flags);
return old;
}
@@ -85,20 +111,20 @@ unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
int swap_cgroup_swapon(int type, unsigned long max_pages)
{
- void *map;
+ struct swap_cgroup_unit *units;
struct swap_cgroup_ctrl *ctrl;
if (mem_cgroup_disabled())
return 0;
- map = vzalloc(max_pages * sizeof(unsigned short));
- if (!map)
+ units = vzalloc(DIV_ROUND_UP(max_pages, ID_PER_UNIT) *
+ sizeof(struct swap_cgroup_unit));
+ if (!units)
goto nomem;
ctrl = &swap_cgroup_ctrl[type];
mutex_lock(&swap_cgroup_mutex);
- ctrl->map = map;
- spin_lock_init(&ctrl->lock);
+ ctrl->units = units;
mutex_unlock(&swap_cgroup_mutex);
return 0;
--
2.47.0
next prev parent reply other threads:[~2024-12-02 18:42 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-02 18:41 [PATCH 0/4] mm/swap_cgroup: remove global swap cgroup lock Kairui Song
2024-12-02 18:41 ` [PATCH 1/4] mm, memcontrol: avoid duplicated memcg enable check Kairui Song
2024-12-02 19:10 ` Yosry Ahmed
2024-12-03 8:25 ` Kairui Song
2024-12-03 18:28 ` Chris Li
2024-12-04 17:05 ` Shakeel Butt
2024-12-02 21:37 ` Shakeel Butt
2024-12-02 22:27 ` Roman Gushchin
2024-12-03 0:24 ` Barry Song
2024-12-03 2:03 ` kernel test robot
2024-12-03 5:42 ` kernel test robot
2024-12-02 18:41 ` [PATCH 2/4] mm/swap_cgroup: remove swap_cgroup_cmpxchg Kairui Song
2024-12-02 19:11 ` Yosry Ahmed
2024-12-02 21:38 ` Shakeel Butt
2024-12-02 22:28 ` Roman Gushchin
2024-12-03 18:29 ` Chris Li
2024-12-02 18:41 ` [PATCH 3/4] mm/swap_cgroup: simplify swap cgroup definitions Kairui Song
2024-12-02 19:25 ` Yosry Ahmed
2024-12-04 21:14 ` Chris Li
2024-12-10 8:15 ` Kairui Song
2024-12-02 22:34 ` Roman Gushchin
2024-12-02 18:41 ` Kairui Song [this message]
2024-12-02 19:28 ` [PATCH 4/4] mm, swap_cgroup: remove global swap cgroup lock Yosry Ahmed
2024-12-02 20:35 ` Yosry Ahmed
2024-12-03 18:20 ` Kairui Song
2024-12-03 19:17 ` Yosry Ahmed
2024-12-04 17:58 ` Kairui Song
2024-12-04 18:57 ` Yosry Ahmed
2024-12-02 19:37 ` Yosry Ahmed
2024-12-04 19:34 ` Chris Li
2024-12-10 7:05 ` Kairui Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241202184154.19321-5-ryncsn@gmail.com \
--to=ryncsn@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=baohua@kernel.org \
--cc=chrisl@kernel.org \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=roman.gushchin@linux.dev \
--cc=shakeel.butt@linux.dev \
--cc=ying.huang@intel.com \
--cc=yosryahmed@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.