From: Nhat Pham <nphamcs@gmail.com>
To: kasong@tencent.com
Cc: Liam.Howlett@oracle.com, akpm@linux-foundation.org,
apopple@nvidia.com, axelrasmussen@google.com, baohua@kernel.org,
baolin.wang@linux.alibaba.com, bhe@redhat.com, byungchul@sk.com,
cgroups@vger.kernel.org, chengming.zhou@linux.dev,
chrisl@kernel.org, corbet@lwn.net, david@kernel.org,
dev.jain@arm.com, gourry@gourry.net, hannes@cmpxchg.org,
hughd@google.com, jannh@google.com, joshua.hahnjy@gmail.com,
lance.yang@linux.dev, lenb@kernel.org, linux-doc@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
linux-pm@vger.kernel.org, lorenzo.stoakes@oracle.com,
matthew.brost@intel.com, mhocko@suse.com, muchun.song@linux.dev,
npache@redhat.com, nphamcs@gmail.com, pavel@kernel.org,
peterx@redhat.com, peterz@infradead.org, pfalcato@suse.de,
rafael@kernel.org, rakie.kim@sk.com, roman.gushchin@linux.dev,
rppt@kernel.org, ryan.roberts@arm.com, shakeel.butt@linux.dev,
shikemeng@huaweicloud.com, surenb@google.com, tglx@kernel.org,
vbabka@suse.cz, weixugc@google.com, ying.huang@linux.alibaba.com,
yosry.ahmed@linux.dev, yuanchu@google.com,
zhengqi.arch@bytedance.com, ziy@nvidia.com, kernel-team@meta.com,
riel@surriel.com
Subject: [PATCH v5 20/21] swapfile: replace the swap map with bitmaps
Date: Fri, 20 Mar 2026 12:27:34 -0700 [thread overview]
Message-ID: <20260320192735.748051-21-nphamcs@gmail.com> (raw)
In-Reply-To: <20260320192735.748051-1-nphamcs@gmail.com>
Now that we have moved the swap count state to virtual swap layer, each
swap map entry only has 3 possible states: free, allocated, and bad.
Replace the swap map with 2 bitmaps (one for allocated state and one for
bad state), saving 6 bits per swap entry.
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
include/linux/swap.h | 3 +-
mm/swapfile.c | 81 +++++++++++++++++++++++---------------------
2 files changed, 44 insertions(+), 40 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 21e528d8d3480..3c789149996c5 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -259,7 +259,8 @@ struct swap_info_struct {
struct plist_node list; /* entry in swap_active_head */
signed char type; /* strange name for an index */
unsigned int max; /* extent of the swap_map */
- unsigned char *swap_map; /* vmalloc'ed array of usage counts */
+ unsigned long *swap_map; /* bitmap for allocated state */
+ unsigned long *bad_map; /* bitmap for bad state */
struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
struct list_head free_clusters; /* free clusters list */
struct list_head full_clusters; /* full clusters list */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b553652125d11..3e2bfcf1aa789 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -760,25 +760,19 @@ static bool cluster_reclaim_range(struct swap_info_struct *si,
struct swap_cluster_info *ci,
unsigned long start, unsigned long end)
{
- unsigned char *map = si->swap_map;
unsigned long offset = start;
int nr_reclaim;
spin_unlock(&ci->lock);
do {
- switch (READ_ONCE(map[offset])) {
- case 0:
+ if (!test_bit(offset, si->swap_map)) {
offset++;
- break;
- case SWAP_MAP_ALLOCATED:
+ } else {
nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY);
if (nr_reclaim > 0)
offset += nr_reclaim;
else
goto out;
- break;
- default:
- goto out;
}
} while (offset < end);
out:
@@ -787,11 +781,7 @@ static bool cluster_reclaim_range(struct swap_info_struct *si,
* Recheck the range no matter reclaim succeeded or not, the slot
* could have been be freed while we are not holding the lock.
*/
- for (offset = start; offset < end; offset++)
- if (READ_ONCE(map[offset]))
- return false;
-
- return true;
+ return find_next_bit(si->swap_map, end, start) >= end;
}
static bool cluster_scan_range(struct swap_info_struct *si,
@@ -800,15 +790,16 @@ static bool cluster_scan_range(struct swap_info_struct *si,
bool *need_reclaim)
{
unsigned long offset, end = start + nr_pages;
- unsigned char *map = si->swap_map;
- unsigned char count;
if (cluster_is_empty(ci))
return true;
for (offset = start; offset < end; offset++) {
- count = READ_ONCE(map[offset]);
- if (!count)
+ /* Bad slots cannot be used for allocation */
+ if (test_bit(offset, si->bad_map))
+ return false;
+
+ if (!test_bit(offset, si->swap_map))
continue;
if (swap_cache_only(si, offset)) {
@@ -841,7 +832,7 @@ static bool cluster_alloc_range(struct swap_info_struct *si, struct swap_cluster
if (cluster_is_empty(ci))
ci->order = order;
- memset(si->swap_map + start, usage, nr_pages);
+ bitmap_set(si->swap_map, start, nr_pages);
swap_range_alloc(si, nr_pages);
ci->count += nr_pages;
@@ -1407,7 +1398,7 @@ static struct swap_info_struct *_swap_info_get(swp_slot_t slot)
offset = swp_slot_offset(slot);
if (offset >= si->max)
goto bad_offset;
- if (data_race(!si->swap_map[swp_slot_offset(slot)]))
+ if (data_race(!test_bit(offset, si->swap_map)))
goto bad_free;
return si;
@@ -1521,8 +1512,7 @@ static void swap_slots_free(struct swap_info_struct *si,
swp_slot_t slot, unsigned int nr_pages)
{
unsigned long offset = swp_slot_offset(slot);
- unsigned char *map = si->swap_map + offset;
- unsigned char *map_end = map + nr_pages;
+ unsigned long end = offset + nr_pages;
/* It should never free entries across different clusters */
VM_BUG_ON(ci != __swap_offset_to_cluster(si, offset + nr_pages - 1));
@@ -1530,10 +1520,8 @@ static void swap_slots_free(struct swap_info_struct *si,
VM_BUG_ON(ci->count < nr_pages);
ci->count -= nr_pages;
- do {
- VM_BUG_ON(!swap_is_last_ref(*map));
- *map = 0;
- } while (++map < map_end);
+ VM_BUG_ON(find_next_zero_bit(si->swap_map, end, offset) < end);
+ bitmap_clear(si->swap_map, offset, nr_pages);
swap_range_free(si, offset, nr_pages);
@@ -1744,9 +1732,7 @@ unsigned int count_swap_pages(int type, int free)
static bool swap_slot_allocated(struct swap_info_struct *si,
unsigned long offset)
{
- unsigned char count = READ_ONCE(si->swap_map[offset]);
-
- return count && swap_count(count) != SWAP_MAP_BAD;
+ return test_bit(offset, si->swap_map);
}
/*
@@ -2067,7 +2053,7 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
}
static void setup_swap_info(struct swap_info_struct *si, int prio,
- unsigned char *swap_map,
+ unsigned long *swap_map,
struct swap_cluster_info *cluster_info)
{
si->prio = prio;
@@ -2095,7 +2081,7 @@ static void _enable_swap_info(struct swap_info_struct *si)
}
static void enable_swap_info(struct swap_info_struct *si, int prio,
- unsigned char *swap_map,
+ unsigned long *swap_map,
struct swap_cluster_info *cluster_info)
{
spin_lock(&swap_lock);
@@ -2188,7 +2174,8 @@ static void flush_percpu_swap_cluster(struct swap_info_struct *si)
SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
{
struct swap_info_struct *p = NULL;
- unsigned char *swap_map;
+ unsigned long *swap_map;
+ unsigned long *bad_map;
struct swap_cluster_info *cluster_info;
struct file *swap_file, *victim;
struct address_space *mapping;
@@ -2283,6 +2270,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
p->swap_file = NULL;
swap_map = p->swap_map;
p->swap_map = NULL;
+ bad_map = p->bad_map;
+ p->bad_map = NULL;
maxpages = p->max;
cluster_info = p->cluster_info;
p->max = 0;
@@ -2293,7 +2282,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
mutex_unlock(&swapon_mutex);
kfree(p->global_cluster);
p->global_cluster = NULL;
- vfree(swap_map);
+ kvfree(swap_map);
+ kvfree(bad_map);
free_cluster_info(cluster_info, maxpages);
inode = mapping->host;
@@ -2641,18 +2631,20 @@ static unsigned long read_swap_header(struct swap_info_struct *si,
static int setup_swap_map(struct swap_info_struct *si,
union swap_header *swap_header,
- unsigned char *swap_map,
+ unsigned long *swap_map,
+ unsigned long *bad_map,
unsigned long maxpages)
{
unsigned long i;
- swap_map[0] = SWAP_MAP_BAD; /* omit header page */
+ set_bit(0, bad_map); /* omit header page */
+
for (i = 0; i < swap_header->info.nr_badpages; i++) {
unsigned int page_nr = swap_header->info.badpages[i];
if (page_nr == 0 || page_nr > swap_header->info.last_page)
return -EINVAL;
if (page_nr < maxpages) {
- swap_map[page_nr] = SWAP_MAP_BAD;
+ set_bit(page_nr, bad_map);
si->pages--;
}
}
@@ -2756,7 +2748,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
int nr_extents;
sector_t span;
unsigned long maxpages;
- unsigned char *swap_map = NULL;
+ unsigned long *swap_map = NULL, *bad_map = NULL;
struct swap_cluster_info *cluster_info = NULL;
struct folio *folio = NULL;
struct inode *inode = NULL;
@@ -2852,16 +2844,24 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
maxpages = si->max;
/* OK, set up the swap map and apply the bad block list */
- swap_map = vzalloc(maxpages);
+ swap_map = kvcalloc(BITS_TO_LONGS(maxpages), sizeof(long), GFP_KERNEL);
if (!swap_map) {
error = -ENOMEM;
goto bad_swap_unlock_inode;
}
- error = setup_swap_map(si, swap_header, swap_map, maxpages);
+ bad_map = kvcalloc(BITS_TO_LONGS(maxpages), sizeof(long), GFP_KERNEL);
+ if (!bad_map) {
+ error = -ENOMEM;
+ goto bad_swap_unlock_inode;
+ }
+
+ error = setup_swap_map(si, swap_header, swap_map, bad_map, maxpages);
if (error)
goto bad_swap_unlock_inode;
+ si->bad_map = bad_map;
+
if (si->bdev && bdev_stable_writes(si->bdev))
si->flags |= SWP_STABLE_WRITES;
@@ -2955,7 +2955,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
si->swap_file = NULL;
si->flags = 0;
spin_unlock(&swap_lock);
- vfree(swap_map);
+ if (swap_map)
+ kvfree(swap_map);
+ if (bad_map)
+ kvfree(bad_map);
if (cluster_info)
free_cluster_info(cluster_info, maxpages);
if (inced_nr_rotate_swap)
--
2.52.0
next prev parent reply other threads:[~2026-03-20 19:28 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-20 19:27 [PATCH v5 00/21] Virtual Swap Space Nhat Pham
2026-03-20 19:27 ` [PATCH v5 01/21] mm/swap: decouple swap cache from physical swap infrastructure Nhat Pham
2026-03-20 19:27 ` [PATCH v5 02/21] swap: rearrange the swap header file Nhat Pham
2026-03-20 19:27 ` [PATCH v5 03/21] mm: swap: add an abstract API for locking out swapoff Nhat Pham
2026-03-20 19:27 ` [PATCH v5 04/21] zswap: add new helpers for zswap entry operations Nhat Pham
2026-03-20 19:27 ` [PATCH v5 05/21] mm/swap: add a new function to check if a swap entry is in swap cached Nhat Pham
2026-03-20 19:27 ` [PATCH v5 06/21] mm: swap: add a separate type for physical swap slots Nhat Pham
2026-03-20 19:27 ` [PATCH v5 07/21] mm: create scaffolds for the new virtual swap implementation Nhat Pham
2026-03-20 19:27 ` [PATCH v5 08/21] zswap: prepare zswap for swap virtualization Nhat Pham
2026-03-20 19:27 ` [PATCH v5 09/21] mm: swap: allocate a virtual swap slot for each swapped out page Nhat Pham
2026-03-20 19:27 ` [PATCH v5 10/21] swap: move swap cache to virtual swap descriptor Nhat Pham
2026-03-20 19:27 ` [PATCH v5 11/21] zswap: move zswap entry management to the " Nhat Pham
2026-03-20 19:27 ` [PATCH v5 12/21] swap: implement the swap_cgroup API using virtual swap Nhat Pham
2026-03-20 19:27 ` [PATCH v5 13/21] swap: manage swap entry lifecycle at the virtual swap layer Nhat Pham
2026-03-20 19:27 ` [PATCH v5 14/21] mm: swap: decouple virtual swap slot from backing store Nhat Pham
2026-03-20 19:27 ` [PATCH v5 15/21] zswap: do not start zswap shrinker if there is no physical swap slots Nhat Pham
2026-03-20 19:27 ` [PATCH v5 16/21] swap: do not unnecesarily pin readahead swap entries Nhat Pham
2026-03-20 19:27 ` [PATCH v5 17/21] swapfile: remove zeromap bitmap Nhat Pham
2026-03-20 19:27 ` [PATCH v5 18/21] memcg: swap: only charge physical swap slots Nhat Pham
2026-03-20 19:27 ` [PATCH v5 19/21] swap: simplify swapoff using virtual swap Nhat Pham
2026-03-20 19:27 ` Nhat Pham [this message]
2026-03-20 19:27 ` [PATCH v5 21/21] vswap: batch contiguous vswap free calls Nhat Pham
2026-03-21 18:22 ` [PATCH v5 00/21] Virtual Swap Space Andrew Morton
2026-03-22 2:18 ` Roman Gushchin
2026-03-23 10:08 ` Kairui Song
2026-03-23 15:32 ` Nhat Pham
2026-03-23 16:40 ` Kairui Song
2026-03-23 20:05 ` Nhat Pham
2026-04-14 17:23 ` Nhat Pham
2026-04-14 17:32 ` Nhat Pham
2026-04-16 18:46 ` Kairui Song
2026-03-25 18:53 ` YoungJun Park
2026-04-12 1:03 ` Nhat Pham
2026-04-14 3:09 ` YoungJun Park
2026-04-20 16:02 ` Nhat Pham
2026-03-24 13:19 ` Askar Safin
2026-03-24 17:23 ` Nhat Pham
2026-03-25 2:35 ` Askar Safin
2026-03-25 18:36 ` YoungJun Park
2026-04-12 1:40 ` Nhat Pham
2026-04-14 2:50 ` YoungJun Park
2026-04-14 3:28 ` Kairui Song
2026-04-14 16:35 ` Nhat Pham
2026-04-14 7:52 ` Christoph Hellwig
2026-04-22 0:26 ` Yosry Ahmed
2026-04-22 2:18 ` Kairui Song
2026-04-22 20:27 ` Yosry Ahmed
2026-04-23 6:16 ` Kairui Song
2026-04-23 20:47 ` Yosry Ahmed
2026-04-24 4:03 ` Gregory Price
2026-04-24 4:15 ` Kairui Song
2026-04-24 4:25 ` Kairui Song
2026-04-24 17:28 ` Nhat Pham
2026-04-24 18:04 ` Kairui Song
2026-04-24 18:08 ` Yosry Ahmed
2026-04-24 18:58 ` Kairui Song
2026-04-24 19:12 ` Yosry Ahmed
2026-04-24 19:51 ` Kairui Song
2026-04-27 18:23 ` Yosry Ahmed
2026-04-28 18:46 ` Kairui Song
2026-04-28 23:53 ` Yosry Ahmed
2026-05-01 13:42 ` Nhat Pham
2026-05-01 14:16 ` Nhat Pham
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260320192735.748051-21-nphamcs@gmail.com \
--to=nphamcs@gmail.com \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=apopple@nvidia.com \
--cc=axelrasmussen@google.com \
--cc=baohua@kernel.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=bhe@redhat.com \
--cc=byungchul@sk.com \
--cc=cgroups@vger.kernel.org \
--cc=chengming.zhou@linux.dev \
--cc=chrisl@kernel.org \
--cc=corbet@lwn.net \
--cc=david@kernel.org \
--cc=dev.jain@arm.com \
--cc=gourry@gourry.net \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=jannh@google.com \
--cc=joshua.hahnjy@gmail.com \
--cc=kasong@tencent.com \
--cc=kernel-team@meta.com \
--cc=lance.yang@linux.dev \
--cc=lenb@kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-pm@vger.kernel.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=matthew.brost@intel.com \
--cc=mhocko@suse.com \
--cc=muchun.song@linux.dev \
--cc=npache@redhat.com \
--cc=pavel@kernel.org \
--cc=peterx@redhat.com \
--cc=peterz@infradead.org \
--cc=pfalcato@suse.de \
--cc=rafael@kernel.org \
--cc=rakie.kim@sk.com \
--cc=riel@surriel.com \
--cc=roman.gushchin@linux.dev \
--cc=rppt@kernel.org \
--cc=ryan.roberts@arm.com \
--cc=shakeel.butt@linux.dev \
--cc=shikemeng@huaweicloud.com \
--cc=surenb@google.com \
--cc=tglx@kernel.org \
--cc=vbabka@suse.cz \
--cc=weixugc@google.com \
--cc=ying.huang@linux.alibaba.com \
--cc=yosry.ahmed@linux.dev \
--cc=yuanchu@google.com \
--cc=zhengqi.arch@bytedance.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.