From: Wenchao Hao <haowenchao22@gmail.com>
To: Albert Ou <aou@eecs.berkeley.edu>,
Alexandre Ghiti <alex@ghiti.fr>,
Andrew Morton <akpm@linux-foundation.org>,
Barry Song <21cnbao@gmail.com>,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
linux-riscv@lists.infradead.org, Minchan Kim <minchan@kernel.org>,
Palmer Dabbelt <palmer@dabbelt.com>,
Paul Walmsley <pjw@kernel.org>,
Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Wenchao Hao <haowenchao22@gmail.com>,
Wenchao Hao <haowenchao@xiaomi.com>
Subject: [RFC PATCH 1/3] mm/zsmalloc: encode class index in obj value for lockless class lookup
Date: Fri, 8 May 2026 14:19:08 +0800 [thread overview]
Message-ID: <20260508061910.3882831-2-haowenchao@xiaomi.com> (raw)
In-Reply-To: <20260508061910.3882831-1-haowenchao@xiaomi.com>
Encode the size class index (class_idx) into the obj value so that
zs_free() can determine the correct size_class without dereferencing
the handle->obj->PFN->zpdesc->zspage->class chain under pool->lock.
OBJ_INDEX_BITS is over-provisioned on 64-bit systems. For example on
arm64 with default chain_size=8: OBJ_INDEX_BITS=24 but only 10 bits
are actually needed for obj_idx. We dynamically compute OBJ_CLASS_BITS
as ilog2(ZS_SIZE_CLASSES - 1) + 1 (8 bits for 4K pages, 9 for 64K)
and verify at compile time via static_assert that the three fields
(PFN + class_idx + obj_idx) fit within BITS_PER_LONG.
This encoding is gated by ZS_OBJ_CLASS_IDX, defined only when
BITS_PER_LONG >= 64. On 32-bit systems the bits do not fit, so
the feature is disabled and the original OBJ_INDEX layout is preserved.
Split OBJ_INDEX into class_idx and obj_idx:
obj: [PFN | class_idx | obj_idx]
[_PFN_BITS | OBJ_CLASS_BITS | OBJ_IDX_BITS]
class_idx is invariant across page migration (only PFN changes), so a
lockless read always yields a valid class_idx.
Update obj_to_location(), location_to_obj() and callers accordingly.
Add obj_to_class_idx() helper. Adjust ZS_MIN_ALLOC_SIZE to use
OBJ_IDX_BITS.
Signed-off-by: Wenchao Hao <haowenchao@xiaomi.com>
---
mm/zsmalloc.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 88 insertions(+), 7 deletions(-)
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 63128ddb7959..bccadf0a27f2 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -96,11 +96,74 @@
#define CLASS_BITS 8
#define MAGIC_VAL_BITS 8
+/*
+ * Optionally encode the size class index in the obj value so that
+ * zs_free() can look up the correct class without holding pool->lock.
+ *
+ * Rather than fixing a hard CLASS_BITS constant for the class_idx field,
+ * we compute the minimum bits needed from the actual number of size classes
+ * and the actual maximum obj_idx, then check whether they all fit:
+ *
+ * _PFN_BITS + OBJ_CLASS_BITS_NEEDED + OBJ_IDX_BITS_NEEDED <= BITS_PER_LONG
+ *
+ * This naturally handles all architectures and PAGE_SIZE configurations:
+ *
+ * - 32-bit: BITS_PER_LONG=32, sum easily exceeds 32 --> disabled.
+ * - powerpc64 64K pages: ZS_SIZE_CLASSES=257 --> OBJ_CLASS_BITS_NEEDED=9,
+ * but the sum still fits in 64 bits --> enabled.
+ * - riscv64 Sv57: _PFN_BITS=44, tight but still fits --> enabled.
+ *
+ * When enabled, obj layout is:
+ *
+ * 63 0
+ * +-----------+--------------+-------------+
+ * | PFN | class_idx | obj_idx |
+ * | _PFN_BITS |OBJ_CLASS_BITS| OBJ_IDX_BITS|
+ * +-----------+--------------+-------------+
+ *
+ * Migration only rewrites PFN; class_idx and obj_idx are invariant,
+ * so a lockless read of obj always yields a valid class_idx.
+ */
+
+#if BITS_PER_LONG >= 64
+#define ZS_OBJ_CLASS_IDX
+#endif
+
+#ifdef ZS_OBJ_CLASS_IDX
+
+/* ZS_SIZE_CLASSES computed conservatively with original OBJ_INDEX_BITS */
+#define ZS_MIN_ALLOC_SIZE_FULL \
+ MAX(32, (CONFIG_ZSMALLOC_CHAIN_SIZE << PAGE_SHIFT >> OBJ_INDEX_BITS))
+#define ZS_SIZE_CLASSES_FULL \
+ (DIV_ROUND_UP(PAGE_SIZE - ZS_MIN_ALLOC_SIZE_FULL, \
+ PAGE_SIZE >> CLASS_BITS) + 1)
+
+#define ZS_MAX_OBJ_COUNT_FULL \
+ (CONFIG_ZSMALLOC_CHAIN_SIZE * PAGE_SIZE / 32)
+#define OBJ_CLASS_BITS_NEEDED (ilog2(ZS_SIZE_CLASSES_FULL - 1) + 1)
+#define OBJ_IDX_BITS_NEEDED (ilog2(ZS_MAX_OBJ_COUNT_FULL - 1) + 1)
+
+static_assert(_PFN_BITS + OBJ_CLASS_BITS_NEEDED + OBJ_IDX_BITS_NEEDED
+ <= BITS_PER_LONG,
+ "zsmalloc: class_idx + obj_idx + PFN do not fit in obj on this config");
+
+#define OBJ_CLASS_BITS OBJ_CLASS_BITS_NEEDED
+#define OBJ_IDX_BITS (OBJ_INDEX_BITS - OBJ_CLASS_BITS)
+#define OBJ_IDX_MASK ((_AC(1, UL) << OBJ_IDX_BITS) - 1)
+#define OBJ_CLASS_MASK ((_AC(1, UL) << OBJ_CLASS_BITS) - 1)
+
+#else /* !ZS_OBJ_CLASS_IDX */
+
+#define OBJ_IDX_BITS OBJ_INDEX_BITS
+#define OBJ_IDX_MASK OBJ_INDEX_MASK
+
+#endif /* ZS_OBJ_CLASS_IDX */
+
#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(CONFIG_ZSMALLOC_CHAIN_SIZE, UL))
/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
#define ZS_MIN_ALLOC_SIZE \
- MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
+ MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_IDX_BITS))
/* each chunk includes extra space to keep handle */
#define ZS_MAX_ALLOC_SIZE PAGE_SIZE
@@ -722,7 +785,7 @@ static void obj_to_location(unsigned long obj, struct zpdesc **zpdesc,
unsigned int *obj_idx)
{
*zpdesc = pfn_zpdesc(obj >> OBJ_INDEX_BITS);
- *obj_idx = (obj & OBJ_INDEX_MASK);
+ *obj_idx = (obj & OBJ_IDX_MASK);
}
static void obj_to_zpdesc(unsigned long obj, struct zpdesc **zpdesc)
@@ -730,17 +793,29 @@ static void obj_to_zpdesc(unsigned long obj, struct zpdesc **zpdesc)
*zpdesc = pfn_zpdesc(obj >> OBJ_INDEX_BITS);
}
+#ifdef ZS_OBJ_CLASS_IDX
+static unsigned int obj_to_class_idx(unsigned long obj)
+{
+ return (obj >> OBJ_IDX_BITS) & OBJ_CLASS_MASK;
+}
+#endif
+
/**
- * location_to_obj - get obj value encoded from (<zpdesc>, <obj_idx>)
+ * location_to_obj - encode (<zpdesc>, <obj_idx>, <class_idx>) into obj value
* @zpdesc: zpdesc object resides in zspage
* @obj_idx: object index
+ * @class_idx: size class index (used only when ZS_OBJ_CLASS_IDX is defined)
*/
-static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx)
+static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx,
+ unsigned int class_idx)
{
unsigned long obj;
obj = zpdesc_pfn(zpdesc) << OBJ_INDEX_BITS;
- obj |= obj_idx & OBJ_INDEX_MASK;
+#ifdef ZS_OBJ_CLASS_IDX
+ obj |= (unsigned long)(class_idx & OBJ_CLASS_MASK) << OBJ_IDX_BITS;
+#endif
+ obj |= obj_idx & OBJ_IDX_MASK;
return obj;
}
@@ -1276,7 +1351,7 @@ static unsigned long obj_malloc(struct zs_pool *pool,
kunmap_local(vaddr);
mod_zspage_inuse(zspage, 1);
- obj = location_to_obj(m_zpdesc, obj);
+ obj = location_to_obj(m_zpdesc, obj, zspage->class);
record_obj(handle, obj);
return obj;
@@ -1762,7 +1837,13 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
old_obj = handle_to_obj(handle);
obj_to_location(old_obj, &dummy, &obj_idx);
- new_obj = (unsigned long)location_to_obj(newzpdesc, obj_idx);
+#ifdef ZS_OBJ_CLASS_IDX
+ new_obj = (unsigned long)location_to_obj(newzpdesc,
+ obj_idx, obj_to_class_idx(old_obj));
+#else
+ new_obj = (unsigned long)location_to_obj(newzpdesc,
+ obj_idx, 0);
+#endif
record_obj(handle, new_obj);
}
}
--
2.34.1
next prev parent reply other threads:[~2026-05-08 6:19 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-08 6:19 [RFC PATCH 0/3] mm/zsmalloc: reduce lock contention in zs_free() Wenchao Hao
2026-05-08 6:19 ` Wenchao Hao [this message]
2026-05-08 6:19 ` [RFC PATCH 2/3] mm/zsmalloc: remove pool->lock from zs_free on 64-bit systems Wenchao Hao
2026-05-08 6:19 ` [RFC PATCH 3/3] mm/zsmalloc: drop class lock before freeing zspage Wenchao Hao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260508061910.3882831-2-haowenchao@xiaomi.com \
--to=haowenchao22@gmail.com \
--cc=21cnbao@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=alex@ghiti.fr \
--cc=aou@eecs.berkeley.edu \
--cc=haowenchao@xiaomi.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-riscv@lists.infradead.org \
--cc=minchan@kernel.org \
--cc=palmer@dabbelt.com \
--cc=pjw@kernel.org \
--cc=senozhatsky@chromium.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox