From: Baoquan He <baoquan.he@linux.dev>
To: Christoph Hellwig <hch@lst.de>
Cc: akpm@linux-foundation.org, chrisl@kernel.org,
usama.arif@linux.dev, kasong@tencent.com, nphamcs@gmail.com,
shikemeng@huaweicloud.com, youngjun.park@lge.com,
linux-mm@kvack.org
Subject: Re: [PATCH 4/8] mm/swap: also use struct swap_iocb for block I/O
Date: Thu, 4 Jun 2026 19:37:09 +0800 [thread overview]
Message-ID: <aiFjZXQ73awVxUFd@MiWiFi-R3L-srv> (raw)
In-Reply-To: <20260601113449.3464734-5-hch@lst.de>
On 06/01/26 at 01:34pm, Christoph Hellwig wrote:
> Block I/O benefits from batching just as much as remote file systems.
> Extent struct swap_iocb to support building a bio on the fly as well,
> and rewrite the block based swap code for it. This especially benefits
> submit_bio based drivers that do not have the block plugging available,
> but also saves allocating extra bios for blk-mq drivers.
>
> Note that the block based swap code now uses the same memcg-based
> check previously added for file system based swap as well.
I would add below words in log to ease patch reviewing/studying, while
it could be personal style.
What it does:
- Adds bio to a union with kiocb in struct swap_iocb, so the same sio
can drive either FS or block I/O;
- Removes 6 functions (swap_writepage_bdev_sync, swap_writepage_bdev_async,
swap_read_folio_bdev_sync, swap_read_folio_bdev_async, swap_writepage_fs,
swap_read_folio_fs) and replaces them with a unified
(swap_add_page + swap_can_merge + submit path);
- Adds sis pointer to swap_io_ctx for direct access in the submit path
- sio_pool_init() is now called unconditionally (not just for FS swap),
which is necessary since bdev swap now also allocates from the sio pool;
swap_add_page() should be rename to swap_add_folio();
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> mm/page_io.c | 526 ++++++++++++++++++++++++--------------------------
> mm/swap.h | 1 +
> mm/swapfile.c | 9 +-
> 3 files changed, 252 insertions(+), 284 deletions(-)
>
> diff --git a/mm/page_io.c b/mm/page_io.c
> index 0bf035dc1170..22c751fe03c0 100644
> --- a/mm/page_io.c
> +++ b/mm/page_io.c
> @@ -28,54 +28,6 @@
> #include "swap.h"
> #include "swap_table.h"
>
> -static void __end_swap_bio_write(struct bio *bio)
> -{
> - struct folio *folio = bio_first_folio_all(bio);
> -
> - if (bio->bi_status) {
> - /*
> - * We failed to write the page out to swap-space.
> - * Re-dirty the page in order to avoid it being reclaimed.
> - * Also print a dire warning that things will go BAD (tm)
> - * very quickly.
> - *
> - * Also clear PG_reclaim to avoid folio_rotate_reclaimable()
> - */
> - folio_mark_dirty(folio);
> - pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n",
> - MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
> - (unsigned long long)bio->bi_iter.bi_sector);
> - folio_clear_reclaim(folio);
> - }
> - folio_end_writeback(folio);
> -}
> -
> -static void end_swap_bio_write(struct bio *bio)
> -{
> - __end_swap_bio_write(bio);
> - bio_put(bio);
> -}
> -
> -static void __end_swap_bio_read(struct bio *bio)
> -{
> - struct folio *folio = bio_first_folio_all(bio);
> -
> - if (bio->bi_status) {
> - pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n",
> - MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
> - (unsigned long long)bio->bi_iter.bi_sector);
> - } else {
> - folio_mark_uptodate(folio);
> - }
> - folio_unlock(folio);
> -}
> -
> -static void end_swap_bio_read(struct bio *bio)
> -{
> - __end_swap_bio_read(bio);
> - bio_put(bio);
> -}
> -
> int generic_swapfile_activate(struct swap_info_struct *sis,
> struct file *swap_file,
> sector_t *span)
> @@ -316,26 +268,47 @@ static inline void count_swpout_vm_event(struct folio *folio)
> }
>
> #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
> -static void bio_associate_blkg_from_page(struct bio *bio, struct folio *folio)
> +static struct cgroup_subsys_state *folio_memcg_blkg_css(struct folio *folio)
> +{
> + return cgroup_e_css(folio_memcg(folio)->css.cgroup, &io_cgrp_subsys);
> +}
> +
> +static bool folio_blkg_can_merge(struct folio *folio, struct folio *prev_folio)
> {
> - struct cgroup_subsys_state *css;
> - struct mem_cgroup *memcg;
> + if (!folio_memcg_charged(folio) || !folio_memcg_charged(prev_folio))
> + return true;
> +
> + rcu_read_lock();
> + if (folio_memcg_blkg_css(folio) != folio_memcg_blkg_css(prev_folio)) {
> + rcu_read_unlock();
> + return false;
> + }
> + rcu_read_unlock();
> +
> + return true;
> +}
>
> +static void bio_associate_blkg_from_page(struct bio *bio, struct folio *folio)
> +{
> if (!folio_memcg_charged(folio))
> return;
> -
> rcu_read_lock();
> - memcg = folio_memcg(folio);
> - css = cgroup_e_css(memcg->css.cgroup, &io_cgrp_subsys);
> - bio_associate_blkg_from_css(bio, css);
> + bio_associate_blkg_from_css(bio, folio_memcg_blkg_css(folio));
> rcu_read_unlock();
> }
> #else
> +static bool folio_blkg_can_merge(struct folio *folio, struct folio *prev_folio)
> +{
> + return true;
> +}
> #define bio_associate_blkg_from_page(bio, folio) do { } while (0)
> #endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */
>
> struct swap_iocb {
> - struct kiocb iocb;
> + union {
> + struct kiocb iocb;
> + struct bio bio;
> + };
> struct bio_vec bvecs[SWAP_CLUSTER_MAX];
> int nr_bvecs;
> int len;
> @@ -355,171 +328,70 @@ int sio_pool_init(void)
> return 0;
> }
>
> -static void sio_write_complete(struct kiocb *iocb, long ret)
> +static bool swap_can_merge(struct swap_io_ctx *ctx, struct folio *folio,
> + int rw)
> {
> - struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
> - struct page *page = sio->bvecs[0].bv_page;
> - int p;
> + struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
> + struct bio_vec *last_bv = &ctx->sio->bvecs[ctx->sio->nr_bvecs - 1];
> + struct folio *prev_folio = page_folio(last_bv->bv_page);
> + size_t prev_folio_size = folio_size(prev_folio);
>
> - if (ret != sio->len) {
> - /*
> - * In the case of swap-over-nfs, this can be a
> - * temporary failure if the system has limited
> - * memory for allocating transmit buffers.
> - * Mark the page dirty and avoid
> - * folio_rotate_reclaimable but rate-limit the
> - * messages.
> - */
> - pr_err_ratelimited("Write error %ld on dio swapfile (%llu)\n",
> - ret, swap_dev_pos(page_swap_entry(page)));
> - for (p = 0; p < sio->nr_bvecs; p++) {
> - page = sio->bvecs[p].bv_page;
> - set_page_dirty(page);
> - ClearPageReclaim(page);
> - }
> - }
> + if (ctx->sis != sis)
> + return false;
>
> - for (p = 0; p < sio->nr_bvecs; p++)
> - end_page_writeback(sio->bvecs[p].bv_page);
> + if (sis->flags & SWP_FS_OPS) {
> + if (swap_dev_pos(folio->swap) !=
> + swap_dev_pos(prev_folio->swap) + prev_folio_size)
> + return false;
> + } else {
> + if (swap_folio_sector(folio) !=
> + swap_folio_sector(prev_folio) +
> + (prev_folio_size >> SECTOR_SHIFT))
> + return false;
> + if (rw == WRITE && !folio_blkg_can_merge(folio, prev_folio))
> + return false;
> + }
>
> - mempool_free(sio, sio_pool);
> + return true;
> }
>
> -static void swap_writepage_fs(struct swap_io_ctx *ctx, struct folio *folio)
> +static void swap_add_page(struct swap_io_ctx *ctx, struct folio *folio, int rw)
> {
> - struct swap_iocb *sio = ctx->sio;
> struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
> - struct file *swap_file = sis->swap_file;
> - loff_t pos = swap_dev_pos(folio->swap);
> + struct swap_iocb *sio = ctx->sio;
>
> - count_swpout_vm_event(folio);
> - folio_start_writeback(folio);
> - folio_unlock(folio);
> - if (sio) {
> - if (sio->iocb.ki_filp != swap_file ||
> - sio->iocb.ki_pos + sio->len != pos) {
> + if (sio && !swap_can_merge(ctx, folio, rw)) {
> + if (rw == WRITE)
> swap_write_submit(ctx);
> - sio = NULL;
> - }
> + else
> + swap_read_submit(ctx);
> + sio = ctx->sio;
> }
> +
> if (!sio) {
> - sio = mempool_alloc(sio_pool, GFP_NOIO);
> - init_sync_kiocb(&sio->iocb, swap_file);
> - sio->iocb.ki_complete = sio_write_complete;
> - sio->iocb.ki_pos = pos;
> + ctx->sis = sis;
> + ctx->sio = sio = mempool_alloc(sio_pool, GFP_NOIO);
> sio->nr_bvecs = 0;
> sio->len = 0;
> }
> bvec_set_folio(&sio->bvecs[sio->nr_bvecs], folio, folio_size(folio), 0);
> sio->len += folio_size(folio);
> - sio->nr_bvecs += 1;
> - if (sio->nr_bvecs == ARRAY_SIZE(sio->bvecs)) {
> - swap_write_submit(ctx);
> - sio = NULL;
> + if (++sio->nr_bvecs == ARRAY_SIZE(sio->bvecs)) {
> + if (rw == WRITE)
> + swap_write_submit(ctx);
> + else
> + swap_read_submit(ctx);
> }
> - ctx->sio = sio;
> -}
> -
> -static void swap_writepage_bdev_sync(struct folio *folio,
> - struct swap_info_struct *sis)
> -{
> - struct bio_vec bv;
> - struct bio bio;
> -
> - bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_WRITE | REQ_SWAP);
> - bio.bi_iter.bi_sector = swap_folio_sector(folio);
> - bio_add_folio_nofail(&bio, folio, folio_size(folio), 0);
> -
> - bio_associate_blkg_from_page(&bio, folio);
> - count_swpout_vm_event(folio);
> -
> - folio_start_writeback(folio);
> - folio_unlock(folio);
> -
> - submit_bio_wait(&bio);
> - __end_swap_bio_write(&bio);
> }
>
> -static void swap_writepage_bdev_async(struct folio *folio,
> - struct swap_info_struct *sis)
> +void __swap_writepage(struct swap_io_ctx *ctx, struct folio *folio)
> {
> - struct bio *bio;
> -
> - bio = bio_alloc(sis->bdev, 1, REQ_OP_WRITE | REQ_SWAP, GFP_NOIO);
> - bio->bi_iter.bi_sector = swap_folio_sector(folio);
> - bio->bi_end_io = end_swap_bio_write;
> - bio_add_folio_nofail(bio, folio, folio_size(folio), 0);
> + VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio);
>
> - bio_associate_blkg_from_page(bio, folio);
> count_swpout_vm_event(folio);
> folio_start_writeback(folio);
> folio_unlock(folio);
> - submit_bio(bio);
> -}
> -
> -void __swap_writepage(struct swap_io_ctx *ctx, struct folio *folio)
> -{
> - struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
> -
> - VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio);
> - /*
> - * ->flags can be updated non-atomically,
> - * but that will never affect SWP_FS_OPS, so the data_race
> - * is safe.
> - */
> - if (data_race(sis->flags & SWP_FS_OPS))
> - swap_writepage_fs(ctx, folio);
> - /*
> - * ->flags can be updated non-atomically,
> - * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race
> - * is safe.
> - */
> - else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO))
> - swap_writepage_bdev_sync(folio, sis);
> - else
> - swap_writepage_bdev_async(folio, sis);
> -}
> -
> -void swap_write_submit(struct swap_io_ctx *ctx)
> -{
> - struct swap_iocb *sio = ctx->sio;
> - struct iov_iter from;
> - int ret;
> -
> - if (!sio)
> - return;
> -
> - iov_iter_bvec(&from, ITER_SOURCE, sio->bvecs, sio->nr_bvecs, sio->len);
> - ret = sio->iocb.ki_filp->f_mapping->a_ops->swap_rw(&sio->iocb, &from);
> - if (ret != -EIOCBQUEUED)
> - sio_write_complete(&sio->iocb, ret);
> - ctx->sio = NULL;
> -}
> -
> -static void sio_read_complete(struct kiocb *iocb, long ret)
> -{
> - struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
> - int p;
> -
> - if (ret == sio->len) {
> - for (p = 0; p < sio->nr_bvecs; p++) {
> - struct folio *folio = page_folio(sio->bvecs[p].bv_page);
> -
> - count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN);
> - count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio));
> - folio_mark_uptodate(folio);
> - folio_unlock(folio);
> - }
> - count_vm_events(PSWPIN, sio->len >> PAGE_SHIFT);
> - } else {
> - for (p = 0; p < sio->nr_bvecs; p++) {
> - struct folio *folio = page_folio(sio->bvecs[p].bv_page);
> -
> - folio_unlock(folio);
> - }
> - pr_alert_ratelimited("Read-error on swap-device\n");
> - }
> - mempool_free(sio, sio_pool);
> + swap_add_page(ctx, folio, WRITE);
> }
>
> /*
> @@ -585,74 +457,6 @@ static bool swap_read_folio_zeromap(struct folio *folio)
> return true;
> }
>
> -static void swap_read_folio_fs(struct swap_io_ctx *ctx, struct folio *folio)
> -{
> - struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
> - struct swap_iocb *sio = ctx->sio;
> - loff_t pos = swap_dev_pos(folio->swap);
> -
> - if (sio) {
> - if (sio->iocb.ki_filp != sis->swap_file ||
> - sio->iocb.ki_pos + sio->len != pos) {
> - swap_read_submit(ctx);
> - sio = NULL;
> - }
> - }
> - if (!sio) {
> - sio = mempool_alloc(sio_pool, GFP_KERNEL);
> - init_sync_kiocb(&sio->iocb, sis->swap_file);
> - sio->iocb.ki_pos = pos;
> - sio->iocb.ki_complete = sio_read_complete;
> - sio->nr_bvecs = 0;
> - sio->len = 0;
> - }
> - bvec_set_folio(&sio->bvecs[sio->nr_bvecs], folio, folio_size(folio), 0);
> - sio->len += folio_size(folio);
> - sio->nr_bvecs += 1;
> - if (sio->nr_bvecs == ARRAY_SIZE(sio->bvecs)) {
> - swap_read_submit(ctx);
> - sio = NULL;
> - }
> - ctx->sio = sio;
> -}
> -
> -static void swap_read_folio_bdev_sync(struct folio *folio,
> - struct swap_info_struct *sis)
> -{
> - struct bio_vec bv;
> - struct bio bio;
> -
> - bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_READ);
> - bio.bi_iter.bi_sector = swap_folio_sector(folio);
> - bio_add_folio_nofail(&bio, folio, folio_size(folio), 0);
> - /*
> - * Keep this task valid during swap readpage because the oom killer may
> - * attempt to access it in the page fault retry time check.
> - */
> - get_task_struct(current);
> - count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN);
> - count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio));
> - count_vm_events(PSWPIN, folio_nr_pages(folio));
> - submit_bio_wait(&bio);
> - __end_swap_bio_read(&bio);
> - put_task_struct(current);
> -}
> -
> -static void swap_read_folio_bdev_async(struct folio *folio,
> - struct swap_info_struct *sis)
> -{
> - struct bio *bio;
> -
> - bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL);
> - bio->bi_iter.bi_sector = swap_folio_sector(folio);
> - bio->bi_end_io = end_swap_bio_read;
> - bio_add_folio_nofail(bio, folio, folio_size(folio), 0);
> - count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN);
> - count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio));
> - count_vm_events(PSWPIN, folio_nr_pages(folio));
> - submit_bio(bio);
> -}
> -
> void swap_read_folio(struct swap_io_ctx *ctx, struct folio *folio)
> {
> struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
> @@ -686,14 +490,7 @@ void swap_read_folio(struct swap_io_ctx *ctx, struct folio *folio)
>
> /* We have to read from slower devices. Increase zswap protection. */
> zswap_folio_swapin(folio);
> -
> - if (data_race(sis->flags & SWP_FS_OPS)) {
> - swap_read_folio_fs(ctx, folio);
> - } else if (synchronous) {
> - swap_read_folio_bdev_sync(folio, sis);
> - } else {
> - swap_read_folio_bdev_async(folio, sis);
> - }
> + swap_add_page(ctx, folio, READ);
>
> finish:
> if (workingset) {
> @@ -703,18 +500,189 @@ void swap_read_folio(struct swap_io_ctx *ctx, struct folio *folio)
> delayacct_swapin_end();
> }
>
> -void swap_read_submit(struct swap_io_ctx *ctx)
> +static void swap_write_end(struct swap_iocb *sio, bool failed)
> +{
> + int p;
> +
> + for (p = 0; p < sio->nr_bvecs; p++) {
> + struct page *page = sio->bvecs[p].bv_page;
> +
> + if (failed) {
> + set_page_dirty(page);
> + ClearPageReclaim(page);
> + }
> + end_page_writeback(page);
> + }
> + mempool_free(sio, sio_pool);
> +}
> +
> +static void swap_fs_write_complete(struct kiocb *iocb, long ret)
> +{
> + struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
> + bool failed = ret != sio->len;
> +
> + if (failed) {
> + struct page *page = sio->bvecs[0].bv_page;
> +
> + /*
> + * In the case of swap-over-nfs, this can be a temporary failure
> + * if the system has limited memory for allocating transmit
> + * buffers. Mark the page dirty and avoid
> + * folio_rotate_reclaimable but rate-limit the messages.
> + */
> + pr_err_ratelimited("Write error %ld on dio swapfile (%llu)\n",
> + ret, swap_dev_pos(page_swap_entry(page)));
> + }
> +
> + swap_write_end(sio, failed);
> +}
> +
> +static void end_swap_bio_write(struct bio *bio)
> +{
> + struct swap_iocb *sio = container_of(bio, struct swap_iocb, bio);
> + bool failed = !!bio->bi_status;
> +
> + if (failed)
> + pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n",
> + MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
> + (unsigned long long)bio->bi_iter.bi_sector);
> + bio_uninit(bio);
> + swap_write_end(sio, failed);
> +}
> +
> +static void swap_read_end(struct swap_iocb *sio, bool failed)
> +{
> + int p;
> +
> + for (p = 0; p < sio->nr_bvecs; p++) {
> + struct folio *folio = page_folio(sio->bvecs[p].bv_page);
> +
> + if (!failed) {
> + count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN);
> + count_memcg_folio_events(folio, PSWPIN,
> + folio_nr_pages(folio));
> + folio_mark_uptodate(folio);
> + }
> + folio_unlock(folio);
> + }
> +
> + if (!failed)
> + count_vm_events(PSWPIN, sio->len >> PAGE_SHIFT);
> +
> + mempool_free(sio, sio_pool);
> +}
> +
> +static void swap_fs_read_complete(struct kiocb *iocb, long ret)
> +{
> + struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
> + bool failed = ret != sio->len;
> +
> + if (failed)
> + pr_alert_ratelimited("Read-error on swap-device\n");
> + swap_read_end(sio, failed);
> +}
> +
> +static void swap_bio_read_end_io(struct bio *bio)
> +{
> + struct swap_iocb *sio = container_of(bio, struct swap_iocb, bio);
> + bool failed = !!bio->bi_status;
> +
> + if (failed)
> + pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n",
> + MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
> + (unsigned long long)bio->bi_iter.bi_sector);
> + bio_uninit(bio);
> + swap_read_end(sio, failed);
> +}
> +
> +static void swap_bdev_submit_write(struct swap_io_ctx *ctx)
> {
> struct swap_iocb *sio = ctx->sio;
> - struct iov_iter from;
> + struct bio *bio = &sio->bio;
> +
> + bio_init(bio, ctx->sis->bdev, sio->bvecs, ARRAY_SIZE(sio->bvecs),
> + REQ_OP_WRITE | REQ_SWAP);
> + bio->bi_iter.bi_size = sio->len;
> + bio->bi_iter.bi_sector = swap_folio_sector(bio_first_folio_all(bio));
> + bio_associate_blkg_from_page(bio, bio_first_folio_all(bio));
> +
> + if (ctx->sis->flags & SWP_SYNCHRONOUS_IO) {
> + submit_bio_wait(bio);
> + end_swap_bio_write(bio);
> + } else {
> + bio->bi_end_io = end_swap_bio_write;
> + submit_bio(bio);
> + }
> +}
> +
> +static void swap_bdev_submit_read(struct swap_io_ctx *ctx)
> +{
> + struct swap_iocb *sio = ctx->sio;
> + struct bio *bio = &sio->bio;
> +
> + bio_init(bio, ctx->sis->bdev, sio->bvecs, ARRAY_SIZE(sio->bvecs),
> + REQ_OP_READ);
> + bio->bi_iter.bi_size = sio->len;
> + bio->bi_iter.bi_sector = swap_folio_sector(bio_first_folio_all(bio));
> +
> + if (ctx->sis->flags & SWP_SYNCHRONOUS_IO) {
> + /*
> + * Keep this task valid during swap readpage because the oom
> + * killer may attempt to access it in the page fault retry
> + * time check.
> + */
> + get_task_struct(current);
> + submit_bio_wait(bio);
> + swap_bio_read_end_io(bio);
> + put_task_struct(current);
> + } else {
> + bio->bi_end_io = swap_bio_read_end_io;
> + submit_bio(bio);
> + }
> +}
> +
> +static void swap_fs_submit(struct swap_io_ctx *ctx, int rw)
> +{
> + struct swap_iocb *sio = ctx->sio;
> + struct iov_iter iter;
> int ret;
>
> - if (!sio)
> - return;
> + init_sync_kiocb(&sio->iocb, ctx->sis->swap_file);
> + sio->iocb.ki_pos = swap_dev_pos(page_folio(sio->bvecs[0].bv_page)->swap);
> + if (rw == WRITE)
> + sio->iocb.ki_complete = swap_fs_write_complete;
> + else
> + sio->iocb.ki_complete = swap_fs_read_complete;
>
> - iov_iter_bvec(&from, ITER_DEST, sio->bvecs, sio->nr_bvecs, sio->len);
> - ret = sio->iocb.ki_filp->f_mapping->a_ops->swap_rw(&sio->iocb, &from);
> + iov_iter_bvec(&iter, rw == WRITE ? ITER_SOURCE : ITER_DEST,
> + sio->bvecs, sio->nr_bvecs, sio->len);
> + ret = sio->iocb.ki_filp->f_mapping->a_ops->swap_rw(&sio->iocb, &iter);
> if (ret != -EIOCBQUEUED)
> - sio_read_complete(&sio->iocb, ret);
> + sio->iocb.ki_complete(&sio->iocb, ret);
> +}
> +
> +void swap_write_submit(struct swap_io_ctx *ctx)
> +{
> + if (!ctx->sio)
> + return;
> +
> + if (ctx->sis->flags & SWP_FS_OPS)
> + swap_fs_submit(ctx, WRITE);
> + else
> + swap_bdev_submit_write(ctx);
> + ctx->sio = NULL;
> + ctx->sis = NULL;
> +}
> +
> +void swap_read_submit(struct swap_io_ctx *ctx)
> +{
> + if (!ctx->sio)
> + return;
> +
> + if (ctx->sis->flags & SWP_FS_OPS)
> + swap_fs_submit(ctx, READ);
> + else
> + swap_bdev_submit_read(ctx);
> ctx->sio = NULL;
> + ctx->sis = NULL;
> }
> diff --git a/mm/swap.h b/mm/swap.h
> index 79d66272dfd4..b6ba80c2afb0 100644
> --- a/mm/swap.h
> +++ b/mm/swap.h
> @@ -81,6 +81,7 @@ enum swap_cluster_flags {
>
> struct swap_io_ctx {
> struct swap_iocb *sio;
> + struct swap_info_struct *sis;
> };
>
> #ifdef CONFIG_SWAP
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 615d90867111..2372f7cc4653 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -2842,6 +2842,10 @@ static int setup_swap_extents(struct swap_info_struct *sis,
> struct inode *inode = mapping->host;
> int ret;
>
> + ret = sio_pool_init();
> + if (ret)
> + return ret;
> +
> if (S_ISBLK(inode->i_mode)) {
> ret = add_swap_extent(sis, 0, sis->max, 0);
> *span = sis->pages;
> @@ -2853,11 +2857,6 @@ static int setup_swap_extents(struct swap_info_struct *sis,
> if (ret < 0)
> return ret;
> sis->flags |= SWP_ACTIVATED;
> - if ((sis->flags & SWP_FS_OPS) &&
> - sio_pool_init() != 0) {
> - destroy_swap_extents(sis, swap_file);
> - return -ENOMEM;
> - }
> return ret;
> }
>
> --
> 2.53.0
>
next prev parent reply other threads:[~2026-06-04 11:37 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-01 11:34 better block swap batching and a different take on swap_ops v2 Christoph Hellwig
2026-06-01 11:34 ` [PATCH 1/8] shmem: provide a shmem_write_folio wrapper Christoph Hellwig
2026-06-04 9:43 ` Baoquan He
2026-06-01 11:34 ` [PATCH 2/8] mm: merge writeout into pageout Christoph Hellwig
2026-06-04 9:44 ` Baoquan He
2026-06-01 11:34 ` [PATCH 3/8] mm/swap: introduce struct swap_io_ctx Christoph Hellwig
2026-06-04 10:58 ` Baoquan He
2026-06-01 11:34 ` [PATCH 4/8] mm/swap: also use struct swap_iocb for block I/O Christoph Hellwig
2026-06-04 10:59 ` Baoquan He
2026-06-04 11:37 ` Baoquan He [this message]
2026-06-01 11:34 ` [PATCH 5/8] mm/swap: remove count_swpout_vm_event Christoph Hellwig
2026-06-04 11:37 ` Baoquan He
2026-06-01 11:34 ` [PATCH 6/8] mm/swap: use swap_ops to register swap device's methods Christoph Hellwig
2026-06-01 11:34 ` [PATCH 7/8] mm/swap: remove SWP_FS_OPS Christoph Hellwig
2026-06-01 11:34 ` [PATCH 8/8] mm/vmstat: add NRSWP{IN,OUT} counters Christoph Hellwig
2026-06-01 13:29 ` better block swap batching and a different take on swap_ops v2 Baoquan He
2026-06-01 14:50 ` Christoph Hellwig
2026-06-01 15:17 ` Baoquan He
2026-06-01 15:25 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aiFjZXQ73awVxUFd@MiWiFi-R3L-srv \
--to=baoquan.he@linux.dev \
--cc=akpm@linux-foundation.org \
--cc=chrisl@kernel.org \
--cc=hch@lst.de \
--cc=kasong@tencent.com \
--cc=linux-mm@kvack.org \
--cc=nphamcs@gmail.com \
--cc=shikemeng@huaweicloud.com \
--cc=usama.arif@linux.dev \
--cc=youngjun.park@lge.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox