Re: [PATCH v6 15/16] mm: zswap: Compress batching with Intel IAA in zswap_store() of large folios.

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Yosry Ahmed <yosry.ahmed@linux.dev>
To: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	hannes@cmpxchg.org, nphamcs@gmail.com, chengming.zhou@linux.dev,
	usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
	akpm@linux-foundation.org, linux-crypto@vger.kernel.org,
	herbert@gondor.apana.org.au, davem@davemloft.net,
	clabbe@baylibre.com, ardb@kernel.org, ebiggers@google.com,
	surenb@google.com, kristen.c.accardi@intel.com,
	wajdi.k.feghali@intel.com, vinodh.gopal@intel.com
Subject: Re: [PATCH v6 15/16] mm: zswap: Compress batching with Intel IAA in zswap_store() of large folios.
Date: Thu, 6 Feb 2025 19:10:33 +0000	[thread overview]
Message-ID: <Z6UJKTCkffZ93us5@google.com> (raw)
In-Reply-To: <20250206072102.29045-16-kanchana.p.sridhar@intel.com>

On Wed, Feb 05, 2025 at 11:21:01PM -0800, Kanchana P Sridhar wrote:
> zswap_compress_folio() is modified to detect if the pool's acomp_ctx has
> more than one "nr_reqs", which will be the case if the cpu onlining code
> has allocated multiple batching resources in the acomp_ctx. If so, it means
> compress batching can be used with a batch-size of "acomp_ctx->nr_reqs".
> 
> If compress batching can be used, zswap_compress_folio() will invoke the
> newly added zswap_batch_compress() procedure to compress and store the
> folio in batches of "acomp_ctx->nr_reqs" pages.
> 
> With Intel IAA, the iaa_crypto driver will compress each batch of pages in
> parallel in hardware.
> 
> Hence, zswap_batch_compress() does the same computes for a batch, as
> zswap_compress() does for a page; and returns true if the batch was
> successfully compressed/stored, and false otherwise.
> 
> If the pool does not support compress batching, or the folio has only one
> page, zswap_compress_folio() calls zswap_compress() for each individual
> page in the folio, as before.
> 
> Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
> ---
>  mm/zswap.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 113 insertions(+), 9 deletions(-)
> 
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 6563d12e907b..f1cba77eda62 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -985,10 +985,11 @@ static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx)
>  	mutex_unlock(&acomp_ctx->mutex);
>  }
>  
> +/* The per-cpu @acomp_ctx mutex should be locked/unlocked in the caller. */

Please use lockdep assertions rather than comments for internal locking rules.

>  static bool zswap_compress(struct page *page, struct zswap_entry *entry,
> -			   struct zswap_pool *pool)
> +			   struct zswap_pool *pool,
> +			   struct crypto_acomp_ctx *acomp_ctx)
>  {
> -	struct crypto_acomp_ctx *acomp_ctx;
>  	struct scatterlist input, output;
>  	int comp_ret = 0, alloc_ret = 0;
>  	unsigned int dlen = PAGE_SIZE;
> @@ -998,7 +999,6 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
>  	gfp_t gfp;
>  	u8 *dst;
>  
> -	acomp_ctx = acomp_ctx_get_cpu_lock(pool);
>  	dst = acomp_ctx->buffers[0];
>  	sg_init_table(&input, 1);
>  	sg_set_page(&input, page, PAGE_SIZE, 0);
> @@ -1051,7 +1051,6 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
>  	else if (alloc_ret)
>  		zswap_reject_alloc_fail++;
>  
> -	acomp_ctx_put_unlock(acomp_ctx);
>  	return comp_ret == 0 && alloc_ret == 0;
>  }
>  
> @@ -1509,20 +1508,125 @@ static void shrink_worker(struct work_struct *w)
>  * main API
>  **********************************/
>  
> +/* The per-cpu @acomp_ctx mutex should be locked/unlocked in the caller. */
> +static bool zswap_batch_compress(struct folio *folio,
> +				 long index,
> +				 unsigned int batch_size,
> +				 struct zswap_entry *entries[],
> +				 struct zswap_pool *pool,
> +				 struct crypto_acomp_ctx *acomp_ctx)
> +{
> +	int comp_errors[ZSWAP_MAX_BATCH_SIZE] = { 0 };
> +	unsigned int dlens[ZSWAP_MAX_BATCH_SIZE];
> +	struct page *pages[ZSWAP_MAX_BATCH_SIZE];
> +	unsigned int i, nr_batch_pages;
> +	bool ret = true;
> +
> +	nr_batch_pages = min((unsigned int)(folio_nr_pages(folio) - index), batch_size);
> +
> +	for (i = 0; i < nr_batch_pages; ++i) {
> +		pages[i] = folio_page(folio, index + i);
> +		dlens[i] = PAGE_SIZE;
> +	}
> +
> +	/*
> +	 * Batch compress @nr_batch_pages. If IAA is the compressor, the
> +	 * hardware will compress @nr_batch_pages in parallel.
> +	 */

Please do not specifically mention IAA in zswap.c, as batching could be
supported in the future by other compressors.

> +	ret = crypto_acomp_batch_compress(
> +		acomp_ctx->reqs,
> +		NULL,
> +		pages,
> +		acomp_ctx->buffers,
> +		dlens,
> +		comp_errors,
> +		nr_batch_pages);

Does crypto_acomp_batch_compress() not require calling
crypto_wait_req()?

> +
> +	if (ret) {
> +		/*
> +		 * All batch pages were successfully compressed.
> +		 * Store the pages in zpool.
> +		 */
> +		struct zpool *zpool = pool->zpool;
> +		gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> +
> +		if (zpool_malloc_support_movable(zpool))
> +			gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
> +
> +		for (i = 0; i < nr_batch_pages; ++i) {
> +			unsigned long handle;
> +			char *buf;
> +			int err;
> +
> +			err = zpool_malloc(zpool, dlens[i], gfp, &handle);
> +
> +			if (err) {
> +				if (err == -ENOSPC)
> +					zswap_reject_compress_poor++;
> +				else
> +					zswap_reject_alloc_fail++;
> +
> +				ret = false;
> +				break;
> +			}
> +
> +			buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
> +			memcpy(buf, acomp_ctx->buffers[i], dlens[i]);
> +			zpool_unmap_handle(zpool, handle);
> +
> +			entries[i]->handle = handle;
> +			entries[i]->length = dlens[i];
> +		}
> +	} else {
> +		/* Some batch pages had compression errors. */
> +		for (i = 0; i < nr_batch_pages; ++i) {
> +			if (comp_errors[i]) {
> +				if (comp_errors[i] == -ENOSPC)
> +					zswap_reject_compress_poor++;
> +				else
> +					zswap_reject_compress_fail++;
> +			}
> +		}
> +	}

This function is awfully close to zswap_compress(). It's essentially a
vectorized version and uses crypto_acomp_batch_compress() instead of
crypto_acomp_compress().

My questions are:
- Can we use crypto_acomp_batch_compress() for the non-batched case as
  well to unify the code? Does it cause any regressions?

- If we have to use different compressions APIs, can we at least reuse
  the rest of the code? We can abstract the compression call into a
  helper that chooses the appropriate API based on the batch size. The
  rest should be the same AFAICT.

> +
> +	return ret;
> +}
> +
>  static bool zswap_compress_folio(struct folio *folio,
>  				 struct zswap_entry *entries[],
>  				 struct zswap_pool *pool)
>  {
>  	long index, nr_pages = folio_nr_pages(folio);
> +	struct crypto_acomp_ctx *acomp_ctx;
> +	unsigned int batch_size;
> +	bool ret = true;
>  
> -	for (index = 0; index < nr_pages; ++index) {
> -		struct page *page = folio_page(folio, index);
> +	acomp_ctx = acomp_ctx_get_cpu_lock(pool);
> +	batch_size = acomp_ctx->nr_reqs;
> +
> +	if ((batch_size > 1) && (nr_pages > 1)) {
> +		for (index = 0; index < nr_pages; index += batch_size) {
> +
> +			if (!zswap_batch_compress(folio, index, batch_size,
> +						  &entries[index], pool, acomp_ctx)) {
> +				ret = false;
> +				goto unlock_acomp_ctx;
> +			}
> +		}
> +	} else {
> +		for (index = 0; index < nr_pages; ++index) {
> +			struct page *page = folio_page(folio, index);
>  
> -		if (!zswap_compress(page, entries[index], pool))
> -			return false;
> +			if (!zswap_compress(page, entries[index], pool, acomp_ctx)) {
> +				ret = false;
> +				goto unlock_acomp_ctx;
> +			}
> +		}
>  	}
>  
> -	return true;
> +unlock_acomp_ctx:
> +	acomp_ctx_put_unlock(acomp_ctx);
> +	return ret;
>  }
>  
>  /*
> -- 
> 2.27.0
>

next prev parent reply	other threads:[~2025-02-06 19:10 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-06  7:20 [PATCH v6 00/16] zswap IAA compress batching Kanchana P Sridhar
2025-02-06  7:20 ` [PATCH v6 01/16] crypto: acomp - Add synchronous/asynchronous acomp request chaining Kanchana P Sridhar
2025-02-06  7:20 ` [PATCH v6 02/16] crypto: acomp - Define new interfaces for compress/decompress batching Kanchana P Sridhar
2025-02-16  5:10   ` Herbert Xu
2025-02-28 10:00     ` Sridhar, Kanchana P
2025-02-06  7:20 ` [PATCH v6 03/16] crypto: iaa - Add an acomp_req flag CRYPTO_ACOMP_REQ_POLL to enable async mode Kanchana P Sridhar
2025-02-06  7:20 ` [PATCH v6 04/16] crypto: iaa - Implement batch_compress(), batch_decompress() API in iaa_crypto Kanchana P Sridhar
2025-02-06  7:20 ` [PATCH v6 05/16] crypto: iaa - Enable async mode and make it the default Kanchana P Sridhar
2025-02-06  7:20 ` [PATCH v6 06/16] crypto: iaa - Disable iaa_verify_compress by default Kanchana P Sridhar
2025-02-06  7:20 ` [PATCH v6 07/16] crypto: iaa - Re-organize the iaa_crypto driver code Kanchana P Sridhar
2025-02-06  7:20 ` [PATCH v6 08/16] crypto: iaa - Map IAA devices/wqs to cores based on packages instead of NUMA Kanchana P Sridhar
2025-02-06  7:20 ` [PATCH v6 09/16] crypto: iaa - Distribute compress jobs from all cores to all IAAs on a package Kanchana P Sridhar
2025-02-06  7:20 ` [PATCH v6 10/16] crypto: iaa - Descriptor allocation timeouts with mitigations in iaa_crypto Kanchana P Sridhar
2025-02-06  7:20 ` [PATCH v6 11/16] crypto: iaa - Fix for "deflate_generic_tfm" global being accessed without locks Kanchana P Sridhar
2025-02-06  7:20 ` [PATCH v6 12/16] mm: zswap: Allocate pool batching resources if the compressor supports batching Kanchana P Sridhar
2025-02-06 18:55   ` Yosry Ahmed
2025-02-28 10:00     ` Sridhar, Kanchana P
2025-02-06  7:20 ` [PATCH v6 13/16] mm: zswap: Restructure & simplify zswap_store() to make it amenable for batching Kanchana P Sridhar
2025-02-06  7:21 ` [PATCH v6 14/16] mm: zswap: Introduce zswap_compress_folio() to compress all pages in a folio Kanchana P Sridhar
2025-02-06  7:21 ` [PATCH v6 15/16] mm: zswap: Compress batching with Intel IAA in zswap_store() of large folios Kanchana P Sridhar
2025-02-06 19:10   ` Yosry Ahmed [this message]
2025-02-06 19:24     ` Sridhar, Kanchana P
2025-02-28 10:00       ` Sridhar, Kanchana P
2025-02-06  7:21 ` [PATCH v6 16/16] mm: zswap: Fix for zstd performance regression with 2M folios Kanchana P Sridhar
2025-02-06 19:15   ` Yosry Ahmed
2025-02-28 10:00     ` Sridhar, Kanchana P
2025-02-20 23:28   ` Nhat Pham
2025-02-21  3:24     ` Sridhar, Kanchana P
2025-02-11 17:05 ` [PATCH v6 00/16] zswap IAA compress batching Eric Biggers
2025-02-11 17:52   ` Nhat Pham

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Z6UJKTCkffZ93us5@google.com \
    --to=yosry.ahmed@linux.dev \
    --cc=21cnbao@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=ardb@kernel.org \
    --cc=chengming.zhou@linux.dev \
    --cc=clabbe@baylibre.com \
    --cc=davem@davemloft.net \
    --cc=ebiggers@google.com \
    --cc=hannes@cmpxchg.org \
    --cc=herbert@gondor.apana.org.au \
    --cc=kanchana.p.sridhar@intel.com \
    --cc=kristen.c.accardi@intel.com \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=nphamcs@gmail.com \
    --cc=ryan.roberts@arm.com \
    --cc=surenb@google.com \
    --cc=usamaarif642@gmail.com \
    --cc=vinodh.gopal@intel.com \
    --cc=wajdi.k.feghali@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.