From: David Sterba <dsterba@suse.cz>
To: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Cc: clm@fb.com, josef@toxicpanda.com, dsterba@suse.com,
herbert@gondor.apana.org.au, linux-btrfs@vger.kernel.org,
linux-crypto@vger.kernel.org, qat-linux@intel.com, embg@meta.com,
cyan@meta.com, brian.will@intel.com, weigang.li@intel.com
Subject: Re: [RFC PATCH 6/6] btrfs: zlib: add support for zlib-deflate through acomp
Date: Mon, 29 Apr 2024 17:57:08 +0200 [thread overview]
Message-ID: <20240429155708.GF2585@twin.jikos.cz> (raw)
In-Reply-To: <20240426110941.5456-7-giovanni.cabiddu@intel.com>
On Fri, Apr 26, 2024 at 11:54:29AM +0100, Giovanni Cabiddu wrote:
> From: Weigang Li <weigang.li@intel.com>
> +static int acomp_comp_pages(struct address_space *mapping, u64 start,
> + unsigned long len, struct page **pages,
> + unsigned long *out_pages,
> + unsigned long *total_in,
> + unsigned long *total_out)
> +{
> + unsigned int nr_src_pages = 0, nr_dst_pages = 0, nr_pages = 0;
> + struct sg_table in_sg = { 0 }, out_sg = { 0 };
> + struct page *in_page, *out_page, **in_pages;
> + struct crypto_acomp *tfm = NULL;
> + struct acomp_req *req = NULL;
> + struct crypto_wait wait;
> + int ret, i;
> +
> + nr_src_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
> + in_pages = kcalloc(nr_src_pages, sizeof(struct page *), GFP_KERNEL);
The maximum length is bounded so you could store the in_pages array in
zlib's workspace.
> + if (!in_pages) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + for (i = 0; i < nr_src_pages; i++) {
> + in_page = find_get_page(mapping, start >> PAGE_SHIFT);
> + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
Output pages should be newly allocated by btrfs_alloc_compr_folio()
> + if (!in_page || !out_page) {
> + ret = -ENOMEM;
> + goto out;
> + }
> + in_pages[i] = in_page;
> + pages[i] = out_page;
> + nr_dst_pages += 1;
> + start += PAGE_SIZE;
> + }
> +
> + ret = sg_alloc_table_from_pages(&in_sg, in_pages, nr_src_pages, 0,
> + nr_src_pages << PAGE_SHIFT, GFP_KERNEL);
I'm not sure if the sg interface allows to use an existing buffer but
the input parameters are bounded in size and count so the allocation
should be dropped and replaced by workspace data.
> + if (ret)
> + goto out;
> +
> + ret = sg_alloc_table_from_pages(&out_sg, pages, nr_dst_pages, 0,
> + nr_dst_pages << PAGE_SHIFT, GFP_KERNEL);
> + if (ret)
> + goto out;
> +
> + crypto_init_wait(&wait);
> + tfm = crypto_alloc_acomp("zlib-deflate", 0, 0);
AFAIK the TFM should be allocated only once way before any IO is done
and then reused, this can trigger resolving the best implementation or
maybe even module loading.
> + if (IS_ERR(tfm)) {
> + ret = PTR_ERR(tfm);
> + goto out;
> + }
> +
> + req = acomp_request_alloc(tfm);
The request should be in workspace, the only initialization I see
setting the right ->tfm pointer.
> + if (!req) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + acomp_request_set_params(req, in_sg.sgl, out_sg.sgl, len,
> + nr_dst_pages << PAGE_SHIFT);
> + acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
> + crypto_req_done, &wait);
> +
> + ret = crypto_wait_req(crypto_acomp_compress(req), &wait);
> + if (ret)
> + goto out;
> +
> + *total_in = len;
> + *total_out = req->dlen;
> + nr_pages = (*total_out + PAGE_SIZE - 1) >> PAGE_SHIFT;
> +
> +out:
> + sg_free_table(&in_sg);
> + sg_free_table(&out_sg);
> +
> + if (in_pages) {
> + for (i = 0; i < nr_src_pages; i++)
> + put_page(in_pages[i]);
> + kfree(in_pages);
Pages returned back to the pool by btrfs_free_compr_folio()
> + }
> +
> + /* free un-used out pages */
> + for (i = nr_pages; i < nr_dst_pages; i++)
> + put_page(pages[i]);
> +
> + if (req)
> + acomp_request_free(req);
> +
> + if (tfm)
> + crypto_free_acomp(tfm);
> +
> + *out_pages = nr_pages;
> +
> + return ret;
> +}
> +
> +static int acomp_zlib_decomp_bio(struct page **in_pages,
> + struct compressed_bio *cb, size_t srclen,
> + unsigned long total_pages_in)
> +{
> + unsigned int nr_dst_pages = BTRFS_MAX_COMPRESSED_PAGES;
> + struct sg_table in_sg = { 0 }, out_sg = { 0 };
> + struct bio *orig_bio = &cb->orig_bbio->bio;
> + char *data_out = NULL, *bv_buf = NULL;
> + int copy_len = 0, bytes_left = 0;
> + struct crypto_acomp *tfm = NULL;
> + struct page **out_pages = NULL;
> + struct acomp_req *req = NULL;
> + struct crypto_wait wait;
> + struct bio_vec bvec;
> + int ret, i = 0;
> +
> + ret = sg_alloc_table_from_pages(&in_sg, in_pages, total_pages_in,
> + 0, srclen, GFP_KERNEL);
Any allocation here needs to be GFP_NOFS for now. Actually we'd need
memalloc_nofs_save/memalloc_nofs_restore around all compression and
decompression code that does not use GFP_NOFS directly and could call
other APIs that do GFP_KERNEL. Like crypto or sg.
> + if (ret)
> + goto out;
> +
> + out_pages = kcalloc(nr_dst_pages, sizeof(struct page *), GFP_KERNEL);
> + if (!out_pages) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + for (i = 0; i < nr_dst_pages; i++) {
> + out_pages[i] = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
> + if (!out_pages[i]) {
> + ret = -ENOMEM;
> + goto out;
> + }
> + }
> +
> + ret = sg_alloc_table_from_pages(&out_sg, out_pages, nr_dst_pages, 0,
> + nr_dst_pages << PAGE_SHIFT, GFP_KERNEL);
> + if (ret)
> + goto out;
> +
> + crypto_init_wait(&wait);
> + tfm = crypto_alloc_acomp("zlib-deflate", 0, 0);
> + if (IS_ERR(tfm)) {
> + ret = PTR_ERR(tfm);
> + goto out;
> + }
> +
> + req = acomp_request_alloc(tfm);
> + if (!req) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + acomp_request_set_params(req, in_sg.sgl, out_sg.sgl, srclen,
> + nr_dst_pages << PAGE_SHIFT);
> + acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
> + crypto_req_done, &wait);
> +
> + ret = crypto_wait_req(crypto_acomp_decompress(req), &wait);
> + if (ret)
> + goto out;
> +
> + /* Copy decompressed buffer to bio pages */
> + bytes_left = req->dlen;
> + for (i = 0; i < nr_dst_pages; i++) {
> + copy_len = bytes_left > PAGE_SIZE ? PAGE_SIZE : bytes_left;
> + data_out = kmap_local_page(out_pages[i]);
> +
> + bvec = bio_iter_iovec(orig_bio, orig_bio->bi_iter);
> + bv_buf = kmap_local_page(bvec.bv_page);
> + memcpy(bv_buf, data_out, copy_len);
> + kunmap_local(bv_buf);
> +
> + bio_advance(orig_bio, copy_len);
> + if (!orig_bio->bi_iter.bi_size)
> + break;
> + bytes_left -= copy_len;
> + if (bytes_left <= 0)
> + break;
> + }
> +out:
> + sg_free_table(&in_sg);
> + sg_free_table(&out_sg);
> +
> + if (out_pages) {
> + for (i = 0; i < nr_dst_pages; i++) {
> + if (out_pages[i])
> + put_page(out_pages[i]);
> + }
> + kfree(out_pages);
> + }
> +
> + if (req)
> + acomp_request_free(req);
> + if (tfm)
> + crypto_free_acomp(tfm);
> +
> + return ret;
> +}
> +
> struct list_head *zlib_get_workspace(unsigned int level)
> {
> struct list_head *ws = btrfs_get_workspace(BTRFS_COMPRESS_ZLIB, level);
> @@ -108,6 +305,15 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
> unsigned long nr_dest_pages = *out_pages;
> const unsigned long max_out = nr_dest_pages * PAGE_SIZE;
>
> + if (crypto_has_acomp("zlib-deflate", 0, 0)) {
> + ret = acomp_comp_pages(mapping, start, len, pages, out_pages,
> + total_in, total_out);
> + if (!ret)
> + return ret;
> +
> + pr_warn("BTRFS: acomp compression failed: ret = %d\n", ret);
> + /* Fallback to SW implementation if HW compression failed */
> + }
> *out_pages = 0;
> *total_out = 0;
> *total_in = 0;
> @@ -281,6 +487,16 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
> unsigned long buf_start;
> struct page **pages_in = cb->compressed_pages;
>
> + if (crypto_has_acomp("zlib-deflate", 0, 0)) {
> + ret = acomp_zlib_decomp_bio(pages_in, cb, srclen,
> + total_pages_in);
> + if (!ret)
> + return ret;
> +
> + pr_warn("BTRFS: acomp decompression failed, ret=%d\n", ret);
> + /* Fallback to SW implementation if HW decompression failed */
> + }
> +
> data_in = kmap_local_page(pages_in[page_in_index]);
> workspace->strm.next_in = data_in;
> workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE);
> --
> 2.44.0
>
prev parent reply other threads:[~2024-04-29 16:04 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-26 10:54 [RFC PATCH 0/6] btrfs: offload zlib-deflate to accelerators Giovanni Cabiddu
2024-04-26 10:54 ` [RFC PATCH 1/6] Revert "crypto: testmgr - Remove zlib-deflate" Giovanni Cabiddu
2024-04-26 10:54 ` [RFC PATCH 2/6] Revert "crypto: deflate " Giovanni Cabiddu
2024-04-26 10:54 ` [RFC PATCH 3/6] Revert "crypto: qat " Giovanni Cabiddu
2024-04-26 10:54 ` [RFC PATCH 4/6] Revert "crypto: qat - remove unused macros in qat_comp_alg.c" Giovanni Cabiddu
2024-04-26 10:54 ` [RFC PATCH 5/6] crypto: qat - change compressor settings for QAT GEN4 Giovanni Cabiddu
2024-04-26 10:54 ` [RFC PATCH 6/6] btrfs: zlib: add support for zlib-deflate through acomp Giovanni Cabiddu
2024-04-29 13:56 ` Josef Bacik
2024-04-29 15:21 ` Cabiddu, Giovanni
2024-04-29 15:44 ` David Sterba
2024-05-03 10:04 ` Herbert Xu
2024-04-29 15:41 ` David Sterba
2025-05-06 15:38 ` Cabiddu, Giovanni
2025-05-07 2:23 ` Herbert Xu
2025-05-07 12:17 ` David Sterba
2025-05-08 4:19 ` Eric Biggers
2025-05-12 17:52 ` David Sterba
2025-05-27 2:32 ` Gao Xiang
2025-05-27 2:45 ` Gao Xiang
2025-05-27 11:17 ` David Sterba
2025-05-27 12:08 ` Gao Xiang
2025-05-07 12:43 ` David Sterba
2025-05-07 13:12 ` Cabiddu, Giovanni
2024-04-29 15:57 ` David Sterba [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240429155708.GF2585@twin.jikos.cz \
--to=dsterba@suse.cz \
--cc=brian.will@intel.com \
--cc=clm@fb.com \
--cc=cyan@meta.com \
--cc=dsterba@suse.com \
--cc=embg@meta.com \
--cc=giovanni.cabiddu@intel.com \
--cc=herbert@gondor.apana.org.au \
--cc=josef@toxicpanda.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=linux-crypto@vger.kernel.org \
--cc=qat-linux@intel.com \
--cc=weigang.li@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox