Linux-RISC-V Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Wenchao Hao <haowenchao22@gmail.com>
To: Albert Ou <aou@eecs.berkeley.edu>,
	Alexandre Ghiti <alex@ghiti.fr>,
	Andrew Morton <akpm@linux-foundation.org>,
	Barry Song <21cnbao@gmail.com>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-riscv@lists.infradead.org, Minchan Kim <minchan@kernel.org>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	Paul Walmsley <pjw@kernel.org>,
	Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Wenchao Hao <haowenchao22@gmail.com>,
	Wenchao Hao <haowenchao@xiaomi.com>
Subject: [RFC PATCH 1/3] mm/zsmalloc: encode class index in obj value for lockless class lookup
Date: Fri,  8 May 2026 14:19:08 +0800	[thread overview]
Message-ID: <20260508061910.3882831-2-haowenchao@xiaomi.com> (raw)
In-Reply-To: <20260508061910.3882831-1-haowenchao@xiaomi.com>

Encode the size class index (class_idx) into the obj value so that
zs_free() can determine the correct size_class without dereferencing
the handle->obj->PFN->zpdesc->zspage->class chain under pool->lock.

OBJ_INDEX_BITS is over-provisioned on 64-bit systems.  For example on
arm64 with default chain_size=8: OBJ_INDEX_BITS=24 but only 10 bits
are actually needed for obj_idx.  We dynamically compute OBJ_CLASS_BITS
as ilog2(ZS_SIZE_CLASSES - 1) + 1 (8 bits for 4K pages, 9 for 64K)
and verify at compile time via static_assert that the three fields
(PFN + class_idx + obj_idx) fit within BITS_PER_LONG.

This encoding is gated by ZS_OBJ_CLASS_IDX, defined only when
BITS_PER_LONG >= 64.  On 32-bit systems the bits do not fit, so
the feature is disabled and the original OBJ_INDEX layout is preserved.

Split OBJ_INDEX into class_idx and obj_idx:

  obj: [PFN | class_idx | obj_idx]
       [_PFN_BITS | OBJ_CLASS_BITS | OBJ_IDX_BITS]

class_idx is invariant across page migration (only PFN changes), so a
lockless read always yields a valid class_idx.

Update obj_to_location(), location_to_obj() and callers accordingly.
Add obj_to_class_idx() helper.  Adjust ZS_MIN_ALLOC_SIZE to use
OBJ_IDX_BITS.

Signed-off-by: Wenchao Hao <haowenchao@xiaomi.com>
---
 mm/zsmalloc.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 88 insertions(+), 7 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 63128ddb7959..bccadf0a27f2 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -96,11 +96,74 @@
 #define CLASS_BITS	8
 #define MAGIC_VAL_BITS	8
 
+/*
+ * Optionally encode the size class index in the obj value so that
+ * zs_free() can look up the correct class without holding pool->lock.
+ *
+ * Rather than fixing a hard CLASS_BITS constant for the class_idx field,
+ * we compute the minimum bits needed from the actual number of size classes
+ * and the actual maximum obj_idx, then check whether they all fit:
+ *
+ *   _PFN_BITS + OBJ_CLASS_BITS_NEEDED + OBJ_IDX_BITS_NEEDED <= BITS_PER_LONG
+ *
+ * This naturally handles all architectures and PAGE_SIZE configurations:
+ *
+ *  - 32-bit: BITS_PER_LONG=32, sum easily exceeds 32 --> disabled.
+ *  - powerpc64 64K pages: ZS_SIZE_CLASSES=257 --> OBJ_CLASS_BITS_NEEDED=9,
+ *    but the sum still fits in 64 bits --> enabled.
+ *  - riscv64 Sv57: _PFN_BITS=44, tight but still fits --> enabled.
+ *
+ * When enabled, obj layout is:
+ *
+ *  63                                              0
+ *  +-----------+--------------+-------------+
+ *  |    PFN    |  class_idx   |   obj_idx   |
+ *  | _PFN_BITS |OBJ_CLASS_BITS| OBJ_IDX_BITS|
+ *  +-----------+--------------+-------------+
+ *
+ * Migration only rewrites PFN; class_idx and obj_idx are invariant,
+ * so a lockless read of obj always yields a valid class_idx.
+ */
+
+#if BITS_PER_LONG >= 64
+#define ZS_OBJ_CLASS_IDX
+#endif
+
+#ifdef ZS_OBJ_CLASS_IDX
+
+/* ZS_SIZE_CLASSES computed conservatively with original OBJ_INDEX_BITS */
+#define ZS_MIN_ALLOC_SIZE_FULL \
+	MAX(32, (CONFIG_ZSMALLOC_CHAIN_SIZE << PAGE_SHIFT >> OBJ_INDEX_BITS))
+#define ZS_SIZE_CLASSES_FULL \
+	(DIV_ROUND_UP(PAGE_SIZE - ZS_MIN_ALLOC_SIZE_FULL, \
+		      PAGE_SIZE >> CLASS_BITS) + 1)
+
+#define ZS_MAX_OBJ_COUNT_FULL \
+	(CONFIG_ZSMALLOC_CHAIN_SIZE * PAGE_SIZE / 32)
+#define OBJ_CLASS_BITS_NEEDED	(ilog2(ZS_SIZE_CLASSES_FULL - 1) + 1)
+#define OBJ_IDX_BITS_NEEDED	(ilog2(ZS_MAX_OBJ_COUNT_FULL - 1) + 1)
+
+static_assert(_PFN_BITS + OBJ_CLASS_BITS_NEEDED + OBJ_IDX_BITS_NEEDED
+	      <= BITS_PER_LONG,
+	"zsmalloc: class_idx + obj_idx + PFN do not fit in obj on this config");
+
+#define OBJ_CLASS_BITS		OBJ_CLASS_BITS_NEEDED
+#define OBJ_IDX_BITS		(OBJ_INDEX_BITS - OBJ_CLASS_BITS)
+#define OBJ_IDX_MASK		((_AC(1, UL) << OBJ_IDX_BITS) - 1)
+#define OBJ_CLASS_MASK		((_AC(1, UL) << OBJ_CLASS_BITS) - 1)
+
+#else /* !ZS_OBJ_CLASS_IDX */
+
+#define OBJ_IDX_BITS		OBJ_INDEX_BITS
+#define OBJ_IDX_MASK		OBJ_INDEX_MASK
+
+#endif /* ZS_OBJ_CLASS_IDX */
+
 #define ZS_MAX_PAGES_PER_ZSPAGE	(_AC(CONFIG_ZSMALLOC_CHAIN_SIZE, UL))
 
 /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
 #define ZS_MIN_ALLOC_SIZE \
-	MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
+	MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_IDX_BITS))
 /* each chunk includes extra space to keep handle */
 #define ZS_MAX_ALLOC_SIZE	PAGE_SIZE
 
@@ -722,7 +785,7 @@ static void obj_to_location(unsigned long obj, struct zpdesc **zpdesc,
 				unsigned int *obj_idx)
 {
 	*zpdesc = pfn_zpdesc(obj >> OBJ_INDEX_BITS);
-	*obj_idx = (obj & OBJ_INDEX_MASK);
+	*obj_idx = (obj & OBJ_IDX_MASK);
 }
 
 static void obj_to_zpdesc(unsigned long obj, struct zpdesc **zpdesc)
@@ -730,17 +793,29 @@ static void obj_to_zpdesc(unsigned long obj, struct zpdesc **zpdesc)
 	*zpdesc = pfn_zpdesc(obj >> OBJ_INDEX_BITS);
 }
 
+#ifdef ZS_OBJ_CLASS_IDX
+static unsigned int obj_to_class_idx(unsigned long obj)
+{
+	return (obj >> OBJ_IDX_BITS) & OBJ_CLASS_MASK;
+}
+#endif
+
 /**
- * location_to_obj - get obj value encoded from (<zpdesc>, <obj_idx>)
+ * location_to_obj - encode (<zpdesc>, <obj_idx>, <class_idx>) into obj value
  * @zpdesc: zpdesc object resides in zspage
  * @obj_idx: object index
+ * @class_idx: size class index (used only when ZS_OBJ_CLASS_IDX is defined)
  */
-static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx)
+static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx,
+				     unsigned int class_idx)
 {
 	unsigned long obj;
 
 	obj = zpdesc_pfn(zpdesc) << OBJ_INDEX_BITS;
-	obj |= obj_idx & OBJ_INDEX_MASK;
+#ifdef ZS_OBJ_CLASS_IDX
+	obj |= (unsigned long)(class_idx & OBJ_CLASS_MASK) << OBJ_IDX_BITS;
+#endif
+	obj |= obj_idx & OBJ_IDX_MASK;
 
 	return obj;
 }
@@ -1276,7 +1351,7 @@ static unsigned long obj_malloc(struct zs_pool *pool,
 	kunmap_local(vaddr);
 	mod_zspage_inuse(zspage, 1);
 
-	obj = location_to_obj(m_zpdesc, obj);
+	obj = location_to_obj(m_zpdesc, obj, zspage->class);
 	record_obj(handle, obj);
 
 	return obj;
@@ -1762,7 +1837,13 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
 
 			old_obj = handle_to_obj(handle);
 			obj_to_location(old_obj, &dummy, &obj_idx);
-			new_obj = (unsigned long)location_to_obj(newzpdesc, obj_idx);
+#ifdef ZS_OBJ_CLASS_IDX
+			new_obj = (unsigned long)location_to_obj(newzpdesc,
+					obj_idx, obj_to_class_idx(old_obj));
+#else
+			new_obj = (unsigned long)location_to_obj(newzpdesc,
+					obj_idx, 0);
+#endif
 			record_obj(handle, new_obj);
 		}
 	}
-- 
2.34.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

  reply	other threads:[~2026-05-08  6:19 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-08  6:19 [RFC PATCH 0/3] mm/zsmalloc: reduce lock contention in zs_free() Wenchao Hao
2026-05-08  6:19 ` Wenchao Hao [this message]
2026-05-08  6:19 ` [RFC PATCH 2/3] mm/zsmalloc: remove pool->lock from zs_free on 64-bit systems Wenchao Hao
2026-05-08  6:19 ` [RFC PATCH 3/3] mm/zsmalloc: drop class lock before freeing zspage Wenchao Hao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260508061910.3882831-2-haowenchao@xiaomi.com \
    --to=haowenchao22@gmail.com \
    --cc=21cnbao@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex@ghiti.fr \
    --cc=aou@eecs.berkeley.edu \
    --cc=haowenchao@xiaomi.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=minchan@kernel.org \
    --cc=palmer@dabbelt.com \
    --cc=pjw@kernel.org \
    --cc=senozhatsky@chromium.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox