public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Dongsheng Yang <dongsheng.yang@easystack.cn>
To: Chaitanya Kulkarni <chaitanyak@nvidia.com>,
	"dan.j.williams@intel.com" <dan.j.williams@intel.com>,
	"axboe@kernel.dk" <axboe@kernel.dk>
Cc: "linux-block@vger.kernel.org" <linux-block@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-cxl@vger.kernel.org" <linux-cxl@vger.kernel.org>,
	Dongsheng Yang <dongsheng.yang.linux@gmail.com>
Subject: Re: [PATCH 5/7] cbd: introuce cbd_backend
Date: Wed, 24 Apr 2024 16:36:30 +0800	[thread overview]
Message-ID: <29b01fbc-080a-28da-72c1-5434635b2d3b@easystack.cn> (raw)
In-Reply-To: <5166cbf7-2680-4f84-9dee-aa214862f2a8@nvidia.com>



在 2024/4/24 星期三 下午 1:03, Chaitanya Kulkarni 写道:
>> +
>> +struct cbd_backend_io {
>> +	struct cbd_se		*se;
>> +	u64			off;
>> +	u32			len;
>> +	struct bio		*bio;
>> +	struct cbd_handler	*handler;
>> +};
>> +
> 
> why not use inline bvecs and avoid bio page allocation for reasonable
> size ? instead of performing the allocation for each request ...

inline bvecs sounds good, Iwill use it in next version.
> 
>> +static inline void complete_cmd(struct cbd_handler *handler, u64 priv_data, int ret)
>> +{
>> +	struct cbd_ce *ce = get_compr_head(handler);
>> +
>> +	memset(ce, 0, sizeof(*ce));
>> +	ce->priv_data = priv_data;
>> +	ce->result = ret;
>> +	CBDC_UPDATE_COMPR_HEAD(handler->channel_info->compr_head,
>> +			       sizeof(struct cbd_ce),
>> +			       handler->channel_info->compr_size);
>> +
>> +	cbdc_flush_ctrl(&handler->channel);
>> +
>> +	return;
>> +}
>> +
>> +static void backend_bio_end(struct bio *bio)
>> +{
>> +	struct cbd_backend_io *backend_io = bio->bi_private;
>> +	struct cbd_se *se = backend_io->se;
>> +	struct cbd_handler *handler = backend_io->handler;
>> +
>> +	if (bio->bi_status == 0 &&
>> +	    cbd_se_hdr_get_op(se->header.len_op) == CBD_OP_READ) {
>> +		cbdc_copy_from_bio(&handler->channel, se->data_off, se->data_len, bio);
>> +	}
>> +
>> +	complete_cmd(handler, se->priv_data, bio->bi_status);
>> +
>> +	bio_free_pages(bio);
>> +	bio_put(bio);
>> +	kfree(backend_io);
>> +}
>> +
>> +static int cbd_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
>> +{
>> +	int ret = 0;
>> +
>> +        while (size) {
>> +                struct page *page = alloc_pages(gfp_mask, 0);
>> +                unsigned len = min_t(size_t, PAGE_SIZE, size);
> 
> alloc_page() call should be close to below check ..

that's right, it should be alloc_page rather than alloc_pages with order 
of 0.
> 
>> +
>> +                if (!page) {
>> +			pr_err("failed to alloc page");
>> +			ret = -ENOMEM;
>> +			break;
>> +		}
>> +
>> +		ret = bio_add_page(bio, page, len, 0);
>> +                if (unlikely(ret != len)) {
>> +                        __free_page(page);
>> +			pr_err("failed to add page");
>> +                        break;
>> +                }
>> +
>> +                size -= len;
>> +        }
>> +
>> +	if (size)
>> +		bio_free_pages(bio);
>> +	else
>> +		ret = 0;
>> +
>> +        return ret;
>> +}
> 
> code formatting seems to be broken for above function plz check..

thanx for pointing it.
> 
>> +
>> +static struct cbd_backend_io *backend_prepare_io(struct cbd_handler *handler, struct cbd_se *se, blk_opf_t opf)
>> +{
>> +	struct cbd_backend_io *backend_io;
>> +	struct cbd_backend *cbdb = handler->cbdb;
>> +
>> +	backend_io = kzalloc(sizeof(struct cbd_backend_io), GFP_KERNEL);
> 
> will above allocation always succeed ? or NULL check should be here ?

sure, it should be checked here. thanx
> 
>> +	backend_io->se = se;
>> +
>> +	backend_io->handler = handler;
>> +	backend_io->bio = bio_alloc_bioset(cbdb->bdev, roundup(se->len, 4096) / 4096, opf, GFP_KERNEL, &handler->bioset);
>> +
>> +	backend_io->bio->bi_iter.bi_sector = se->offset >> SECTOR_SHIFT;
>> +	backend_io->bio->bi_iter.bi_size = 0;
>> +	backend_io->bio->bi_private = backend_io;
>> +	backend_io->bio->bi_end_io = backend_bio_end;
>> +
>> +	return backend_io;
>> +}
>> +
>> +static int handle_backend_cmd(struct cbd_handler *handler, struct cbd_se *se)
>> +{
>> +	struct cbd_backend *cbdb = handler->cbdb;
>> +	u32 len = se->len;
>> +	struct cbd_backend_io *backend_io = NULL;
>> +	int ret;
>> +
>> +	if (cbd_se_hdr_flags_test(se, CBD_SE_HDR_DONE)) {
>> +		return 0 ;
>> +	}
>> +
>> +	switch (cbd_se_hdr_get_op(se->header.len_op)) {
>> +	case CBD_OP_PAD:
>> +		cbd_se_hdr_flags_set(se, CBD_SE_HDR_DONE);
>> +		return 0;
>> +	case CBD_OP_READ:
>> +		backend_io = backend_prepare_io(handler, se, REQ_OP_READ);
>> +		break;
>> +	case CBD_OP_WRITE:
>> +		backend_io = backend_prepare_io(handler, se, REQ_OP_WRITE);
>> +		break;
>> +	case CBD_OP_DISCARD:
>> +		ret = blkdev_issue_discard(cbdb->bdev, se->offset >> SECTOR_SHIFT,
>> +				se->len, GFP_NOIO);
> 
> any specific reason to not use GFP_KERNEL ?

Using GFP_NOIO is intended to avoid memory allocation loops in the I/O 
path, but in this case, it's actually handling remote I/O requests, so 
theoretically using GFP_KERNEL should also work.
> 
>> +		goto complete_cmd;
>> +	case CBD_OP_WRITE_ZEROS:
>> +		ret = blkdev_issue_zeroout(cbdb->bdev, se->offset >> SECTOR_SHIFT,
>> +				se->len, GFP_NOIO, 0);
> 
> any specific reason to not use GFP_KERNEL ?

ditto
> 
>> +		goto complete_cmd;
>> +	case CBD_OP_FLUSH:
>> +		ret = blkdev_issue_flush(cbdb->bdev);
>> +		goto complete_cmd;
>> +	default:
>> +		pr_err("unrecognized op: %x", cbd_se_hdr_get_op(se->header.len_op));
>> +		ret = -EIO;
>> +		goto complete_cmd;
>> +	}
>> +
>> +	if (!backend_io)
>> +		return -ENOMEM;
> 
> there is no NULL check in the backend_prepare_io() not sure about
> above condition in current code unless you return NULL ...

backend_prepare_io should check NULL :)
> 
>> +
>> +	ret = cbd_bio_alloc_pages(backend_io->bio, len, GFP_NOIO);
>> +	if (ret) {
>> +		kfree(backend_io);
>> +		return ret;
>> +	}
>> +
>> +	if (cbd_se_hdr_get_op(se->header.len_op) == CBD_OP_WRITE) {
>> +		cbdc_copy_to_bio(&handler->channel, se->data_off, se->data_len, backend_io->bio);
>> +	}
>> +
>> +	submit_bio(backend_io->bio);
>> +
> 
> unless I didn't understand the code, you are building a single bio from
> incoming request, that might not have enough space to accommodate all
> the data from incoming request, hence you are returning an error from
> cbd_bio_alloc_pages() when bio_add_page() fail ...
> 
> bio_add_page() can fail for multiple reasons, instead of trying to
> build only one bio that might be smaller for the size of the I/O and
> returning error, why not use the chain of the small size bios ? that
> way you will not run out of the space in single bio and still finish
> the I/O by avoiding bio_add_page() failure that might happen due to
> bio full ?

"bio_add_page" should only return an error when "bio->bi_vcnt >= 
bio->bi_max_vecs". However, in our case, "bi_max_vecs" is calculated 
when "bio_alloc_bioset" is called, so "bi_vcnt" should not exceed 
"bi_max_vecs". In other words, theoretically, "bio_add_page" should not 
fail here.
> 
>> +	return 0;
>> +
>> +complete_cmd:
>> +	complete_cmd(handler, se->priv_data, ret);
>> +	return 0;
>> +}
>> +
>> +static void handle_work_fn(struct work_struct *work)
>> +{
>> +	struct cbd_handler *handler = container_of(work, struct cbd_handler, handle_work.work);
>> +	struct cbd_se *se;
>> +	int ret;
>> +again:
>> +	/* channel ctrl would be updated by blkdev queue */
>> +	cbdc_flush_ctrl(&handler->channel);
>> +	se = get_se_to_handle(handler);
>> +	if (se == get_se_head(handler)) {
>> +		if (cbdwc_need_retry(&handler->handle_worker_cfg)) {
>> +			goto again;
>> +		}
>> +
>> +		cbdwc_miss(&handler->handle_worker_cfg);
>> +
>> +		queue_delayed_work(handler->handle_wq, &handler->handle_work, usecs_to_jiffies(0));
>> +		return;
>> +	}
>> +
>> +	cbdwc_hit(&handler->handle_worker_cfg);
>> +	cbdt_flush_range(handler->cbdb->cbdt, se, sizeof(*se));
>> +	ret = handle_backend_cmd(handler, se);
>> +	if (!ret) {
>> +		/* this se is handled */
>> +		handler->se_to_handle = (handler->se_to_handle + cbd_se_hdr_get_len(se->header.len_op)) % handler->channel_info->cmdr_size;
> 
> this is a really long line, if possible keep code under 80 char, I know
> it's not a requirement anymore but it will match block drivers ..

That's indeed long. I'll try to make it more concise in the next version.

Kulkarni, thanx for your review, all each comment helps :)

Thanx
> 
> -ck
> 
> 

  reply	other threads:[~2024-04-24  8:36 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-22  7:15 [PATCH RFC 0/7] block: Introduce CBD (CXL Block Device) Dongsheng Yang
2024-04-22  7:16 ` [PATCH 1/7] block: Init for CBD(CXL " Dongsheng Yang
2024-04-22 18:39   ` Randy Dunlap
2024-04-22 22:41     ` Dongsheng Yang
2024-04-24  3:58   ` Chaitanya Kulkarni
2024-04-24  8:36     ` Dongsheng Yang
2024-04-22  7:16 ` [PATCH 2/7] cbd: introduce cbd_transport Dongsheng Yang
2024-04-24  4:08   ` Chaitanya Kulkarni
2024-04-24  8:43     ` Dongsheng Yang
2024-04-22  7:16 ` [PATCH 3/7] cbd: introduce cbd_channel Dongsheng Yang
2024-04-22  7:16 ` [PATCH 4/7] cbd: introduce cbd_host Dongsheng Yang
2024-04-25  5:51   ` [EXTERNAL] " Bharat Bhushan
2024-04-22  7:16 ` [PATCH 5/7] cbd: introuce cbd_backend Dongsheng Yang
2024-04-24  5:03   ` Chaitanya Kulkarni
2024-04-24  8:36     ` Dongsheng Yang [this message]
2024-04-25  5:46   ` [EXTERNAL] " Bharat Bhushan
2024-04-22  7:16 ` [PATCH 7/7] cbd: add related sysfs files in transport register Dongsheng Yang
2024-04-25  5:24   ` [EXTERNAL] " Bharat Bhushan
2024-04-22 22:42 ` [PATCH 6/7] cbd: introduce cbd_blkdev Dongsheng Yang
2024-04-23  7:27   ` Dongsheng Yang
2024-04-24  4:29 ` [PATCH RFC 0/7] block: Introduce CBD (CXL Block Device) Dan Williams
2024-04-24  6:33   ` Dongsheng Yang
2024-04-24 15:14     ` Gregory Price
2024-04-26  1:25       ` Dongsheng Yang
2024-04-26 13:48         ` Gregory Price
2024-04-26 14:53           ` Dongsheng Yang
2024-04-26 16:14             ` Gregory Price
2024-04-28  5:47               ` Dongsheng Yang
2024-04-28 16:44                 ` Gregory Price
2024-04-28 16:55                 ` John Groves
2024-05-03  9:52                   ` Jonathan Cameron
2024-05-08 11:39                     ` Dongsheng Yang
2024-05-08 12:11                       ` Jonathan Cameron
2024-05-08 13:03                         ` Dongsheng Yang
2024-05-08 15:44                           ` Jonathan Cameron
2024-05-09 11:24                             ` Dongsheng Yang
2024-05-09 12:21                               ` Jonathan Cameron
2024-05-09 13:03                                 ` Dongsheng Yang
2024-05-21 18:41                                   ` Dan Williams
2024-05-22  6:17                                     ` Dongsheng Yang
2024-05-29 15:25                                       ` Gregory Price
2024-05-30  6:59                                         ` Dongsheng Yang
2024-05-30 13:38                                           ` Jonathan Cameron
2024-06-01  3:22                                             ` Dan Williams
2024-06-03 12:48                                               ` Jonathan Cameron
2024-06-03 17:28                                                 ` James Morse
2024-06-04 14:26                                                   ` Jonathan Cameron
2024-05-31 14:23                                           ` Gregory Price
2024-06-03  1:33                                             ` Dongsheng Yang
2024-04-30  0:34                 ` Dan Williams
2024-04-24 18:08     ` Dan Williams
     [not found]       ` <539c1323-68f9-d753-a102-692b69049c20@easystack.cn>
2024-04-30  0:10         ` Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=29b01fbc-080a-28da-72c1-5434635b2d3b@easystack.cn \
    --to=dongsheng.yang@easystack.cn \
    --cc=axboe@kernel.dk \
    --cc=chaitanyak@nvidia.com \
    --cc=dan.j.williams@intel.com \
    --cc=dongsheng.yang.linux@gmail.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox