Re: [PATCH 2/3] zram: support page-based parallel write

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Minchan Kim <minchan@kernel.org>
To: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 2/3] zram: support page-based parallel write
Date: Mon, 24 Oct 2016 13:47:14 +0900	[thread overview]
Message-ID: <20161024044714.GA4938@blaptop> (raw)
In-Reply-To: <20161021060334.GA527@swordfish>

Hi Sergey,

On Fri, Oct 21, 2016 at 03:03:34PM +0900, Sergey Senozhatsky wrote:
> On (09/22/16 15:42), Minchan Kim wrote:
> > +static ssize_t use_aio_store(struct device *dev,
> > +		struct device_attribute *attr, const char *buf, size_t len)
> > +{
> > +	int ret;
> > +	u16 do_async;
> > +	struct zram *zram  = dev_to_zram(dev);
> > +
> > +	ret = kstrtou16(buf, 10, &do_async);
> > +	if (ret)
> > +		return ret;
> > +
> > +	down_write(&zram->init_lock);
> > +	if (init_done(zram)) {
> > +		up_write(&zram->init_lock);
> > +		pr_info("Can't change for initialized device\n");
> > +		return -EBUSY;
> > +	}
> > +
> > +	if (do_async)
> > +		zram->use_aio = true;
> > +	else
> > +		zram->use_aio = false;
> 
> a stupid nitpick:
> 	zram->use_aio = do_async?

Just matter of preference. I don't mind it.
Will use it in next spin.

> 
> > +	up_write(&zram->init_lock);
> > +
> > +	return len;
> > +}
> > +#endif
> > +
> [..]
> > +static void worker_wake_up(void)
> > +{
> > +	/*
> > +	 * Unless it's enough workes to handle accumulated requests,
> > +	 * wake up new workers.
> > +	 */
> > +	if (workers.nr_running * NR_BATCH_PAGES < workers.nr_req) {
> > +		int nr_wakeup = (workers.nr_req + NR_BATCH_PAGES) /
> > +				NR_BATCH_PAGES - workers.nr_running;
> > +
> > +		wake_up_nr(&workers.req_wait, nr_wakeup);
> > +	}
> > +}
> > +
> > +static void zram_unplug(struct blk_plug_cb *cb, bool from_schedule)
> > +{
> > +	spin_lock(&workers.req_lock);
> > +	if (workers.nr_req)
> > +		worker_wake_up();
> > +	spin_unlock(&workers.req_lock);
> > +	kfree(cb);
> > +}
> > +
> > +static int zram_check_plugged(void)
> > +{
> > +	return !!blk_check_plugged(zram_unplug, NULL,
> > +			sizeof(struct blk_plug_cb));
> > +}
> 
> I'm having some troubles understanding the purpose of zram_check_plugged().
> it's a global symbol, can you just use it directly? otherwise we are
> doing additional kmalloc/kfree, spin_lock/unlock and so on.

I don't understnad it. Why does it that use zram_check_plugged directly reduce
count things you mentioned?
> 
> what am I missing? current->plug? can it affect us? how?

Sorry. I can't understand your point.

> 
> 
> > +int queue_page_request(struct zram *zram, struct bio_vec *bvec, u32 index,
> > +                       int offset, bool write)
> 
> static int?

Yeb.

> 
> > +{
> > +       struct page_request *page_req = kmalloc(sizeof(*page_req), GFP_NOIO);
> > +
> > +       if (!page_req)
> > +               return -ENOMEM;
> 
> 
> ...
> 
> 
> > +int queue_page_request_list(struct zram *zram, struct bio_request *bio_req,
> 
> static int?

Yeb.

> 
> > +			struct bio_vec *bvec, u32 index, int offset,
> > +			bool write, struct list_head *page_list)
> > +{
> > +	struct page_request *page_req = kmalloc(sizeof(*page_req), GFP_NOIO);
> > +
> > +	if (!page_req) {
> > +		while (!list_empty(page_list)) {
> > +			page_req = list_first_entry(page_list,
> > +					struct page_request, list);
> > +			list_del(&page_req->list);
> > +			kfree(page_req);
> > +		}
> > +
> > +		return -ENOMEM;
> > +	}
> > +
> > +	page_req->bio_req = bio_req;
> > +	page_req->zram = zram;
> > +	page_req->bvec = *bvec;
> > +	page_req->index = index;
> > +	page_req->offset = offset;
> > +	page_req->write = write;
> > +	atomic_inc(&bio_req->nr_pages);
> > +
> > +	list_add_tail(&page_req->list, page_list);
> > +
> > +	return 0;
> > +}
> > +
> > +/*
> > + * @page_list: pages isolated from request queue
> > + */
> > +static void get_page_requests(struct list_head *page_list)
> > +{
> > +	struct page_request *page_req;
> > +	int nr_pages;
> > +
> > +	for (nr_pages = 0; nr_pages < NR_BATCH_PAGES; nr_pages++) {
> > +		if  (list_empty(&workers.req_list))
> > +			break;
> > +
> > +		page_req = list_first_entry(&workers.req_list,
> > +					struct page_request, list);
> > +		list_move(&page_req->list, page_list);
> > +	}
> > +
> > +	workers.nr_req -= nr_pages;
> > +}
> > +
> > +/*
> > + * move @page_list to request queue and wake up workers if it is necessary.
> > + */
> > +static void run_worker(struct bio *bio, struct list_head *page_list,
> > +			unsigned int nr_pages)
> > +{
> > +	WARN_ON(list_empty(page_list));
> > +
> > +	spin_lock(&workers.req_lock);
> > +	list_splice_tail(page_list, &workers.req_list);
> > +	workers.nr_req += nr_pages;
> > +	if (bio->bi_opf & REQ_SYNC || !zram_check_plugged())
> > +		worker_wake_up();
> > +	spin_unlock(&workers.req_lock);
> > +}
> > +
> > +static bool __zram_make_async_request(struct zram *zram, struct bio *bio)
> > +{
> > +	int offset;
> > +	u32 index;
> > +	struct bio_vec bvec;
> > +	struct bvec_iter iter;
> > +	LIST_HEAD(page_list);
> > +	struct bio_request *bio_req;
> > +	unsigned int nr_pages = 0;
> > +	bool write = op_is_write(bio_op(bio));
> > +
> > +	if (!zram->use_aio || !op_is_write(bio_op(bio)))
> > +		return false;
> > +
> > +	bio_req = kmalloc(sizeof(*bio_req), GFP_NOIO);
> > +	if (!bio_req)
> > +		return false;
> > +
> > +	index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
> > +	offset = (bio->bi_iter.bi_sector &
> > +		  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
> > +
> > +	bio_req->bio = bio;
> > +	atomic_set(&bio_req->nr_pages, 0);
> > +
> > +	bio_for_each_segment(bvec, bio, iter) {
> > +		int max_transfer_size = PAGE_SIZE - offset;
> > +
> > +		if (bvec.bv_len > max_transfer_size) {
> > +			struct bio_vec bv;
> > +
> > +			bv.bv_page = bvec.bv_page;
> > +			bv.bv_len = max_transfer_size;
> > +			bv.bv_offset = bvec.bv_offset;
> > +
> > +			if (queue_page_request_list(zram, bio_req, &bv,
> > +					index, offset, write, &page_list))
> > +				goto fail;
> > +
> > +			bv.bv_len = bvec.bv_len - max_transfer_size;
> > +			bv.bv_offset += max_transfer_size;
> > +			if (queue_page_request_list(zram, bio_req, &bv,
> > +					index + 1, 0, write, &page_list))
> > +				goto fail;
> > +		} else {
> > +			if (queue_page_request_list(zram, bio_req, &bvec,
> > +					index, offset, write, &page_list))
> > +				goto fail;
> > +		}
> > +
> > +		nr_pages++;
> > +		update_position(&index, &offset, &bvec);
> > +	}
> > +
> > +	run_worker(bio, &page_list, nr_pages);
> > +
> > +	return true;
> > +fail:
> > +	kfree(bio_req);
> > +
> > +	WARN_ON(!list_empty(&page_list));
> > +
> > +	return false;
> > +}
> > +
> > +void page_requests_rw(struct list_head *page_list)
> > +{
> > +	struct page_request *page_req;
> > +	bool write;
> > +	struct page *page;
> > +	struct zram *zram;
> > +	int err;
> > +	bool free_bio;
> > +	struct bio_request *bio_req;
> > +
> > +	while (!list_empty(page_list)) {
> > +		free_bio = false;
> > +		page_req = list_last_entry(page_list, struct page_request,
> > +					list);
> > +		write = page_req->write;
> > +		page = page_req->bvec.bv_page;
> > +		zram = page_req->zram;
> > +		bio_req = page_req->bio_req;
> > +		if (bio_req && atomic_dec_and_test(&bio_req->nr_pages))
> > +			free_bio = true;
> > +		list_del(&page_req->list);
> > +
> > +		err = zram_bvec_rw(zram, &page_req->bvec, page_req->index,
> > +			page_req->offset, page_req->write);
> > +
> > +		kfree(page_req);
> > +
> > +		/* page-based request */
> > +		if (!bio_req) {
> > +			page_endio(page, write, err);
> > +			zram_meta_put(zram);
> 
> who is calling zram_meta_get()? I mean in this loop...
> what if the loop continues after that `if'?

In this loop, anybody doesn't call zram_meta_get.
It should be called by IO queuing routine. !bio_req case above is rw_page thing
so each page_req IO request holds a reference of zram_meta and
in here, service routine release the refernce after IO done.

> 
> > +		/* bio-based request */
> > +		} else if (free_bio) {
> > +			if (likely(!err))
> > +				bio_endio(bio_req->bio);
> > +			else
> > +				bio_io_error(bio_req->bio);
> > +			kfree(bio_req);
> > +			zram_meta_put(zram);
> 
> ditto, who is calling zram_meta_get()?
> what if the loop continue after that `if'?
> 
> 
> we do zram_meta_get() only once in zram_make_request(). but then
> put meta for every request in page_list, while a bio passed to
> __zram_make_async_request() can span across several requests in
> page_list. right?

You're right. bio-based request case should be fixed.

> 
> > +		}
> > +	}
> > +}
> > +
> > +static int zram_thread(void *data)
> > +{
> > +	DEFINE_WAIT(wait);
> > +	LIST_HEAD(page_list);
> > +
> > +	spin_lock(&workers.req_lock);
> > +	workers.nr_running++;
> > +
> > +	while (!kthread_should_stop()) {
> > +		if (list_empty(&workers.req_list)) {
> > +			prepare_to_wait_exclusive(&workers.req_wait, &wait,
> > +					TASK_INTERRUPTIBLE);
> > +			workers.nr_running--;
> > +			spin_unlock(&workers.req_lock);
> > +			schedule();
> > +			spin_lock(&workers.req_lock);
> > +			finish_wait(&workers.req_wait, &wait);
> > +			workers.nr_running++;
> > +			continue;
> > +		}
> > +
> > +		get_page_requests(&page_list);
> > +		if (list_empty(&page_list))
> > +			continue;
> > +
> > +		spin_unlock(&workers.req_lock);
> > +		page_requests_rw(&page_list);
> > +		cond_resched();
> > +		spin_lock(&workers.req_lock);
> > +	}
> > +
> > +	workers.nr_running--;
> > +	spin_unlock(&workers.req_lock);
> > +
> > +	return 0;
> > +}
> > +
> > +static void destroy_workers(void)
> > +{
> > +	struct zram_worker *worker;
> > +
> > +	while (!list_empty(&workers.worker_list)) {
> > +		worker = list_first_entry(&workers.worker_list,
> > +				struct zram_worker,
> > +				list);
> > +		kthread_stop(worker->task);
> > +		list_del(&worker->list);
> > +		kfree(worker);
> > +	}
> > +
> > +	WARN_ON(workers.nr_running);
> > +}
> > +
> > +static int create_workers(void)
> > +{
> > +	int i;
> > +	int nr_cpu = num_online_cpus();
> > +	struct zram_worker *worker;
> > +
> > +	INIT_LIST_HEAD(&workers.worker_list);
> > +	INIT_LIST_HEAD(&workers.req_list);
> > +	spin_lock_init(&workers.req_lock);
> > +	init_waitqueue_head(&workers.req_wait);
> > +
> > +	for (i = 0; i < nr_cpu; i++) {
> > +		worker = kmalloc(sizeof(*worker), GFP_KERNEL);
> > +		if (!worker)
> > +			goto error;
> > +
> > +		worker->task = kthread_run(zram_thread, NULL, "zramd-%d", i);
> > +		if (IS_ERR(worker->task)) {
> > +			kfree(worker);
> > +			goto error;
> > +		}
> > +
> > +		list_add(&worker->list, &workers.worker_list);
> > +	}
> > +
> > +	return 0;
> > +
> > +error:
> > +	destroy_workers();
> > +	return 1;
> > +}
> > +
> > +static int zram_rw_async_page(struct zram *zram,
> > +			struct bio_vec *bv, u32 index, int offset,
> > +			bool is_write)
> > +{
> > +	if (!zram->use_aio || !is_write)
> > +		return 1;
> > +
> > +	return queue_page_request(zram, bv, index, offset, is_write);
> > +}
> > +#else
> > +static bool __zram_make_async_request(struct zram *zram, struct bio *bio)
> > +{
> > +	return false;
> > +}
> > +
> > +static int zram_rw_async_page(struct zram *zram,
> > +			struct bio_vec *bv, u32 index, int offset,
> > +			bool is_write)
> > +{
> > +	return 1;
> > +}
> > +
> > +static int create_workers(void)
> > +{
> > +	return 0;
> > +}
> > +
> > +static void destroy_workers(void)
> > +{
> > +}
> > +#endif
> > +
> >  /*
> >   * Handler function for all zram I/O requests.
> >   */
> > @@ -954,6 +1380,9 @@ static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
> >  		goto out;
> >  	}
> >  
> > +	if (__zram_make_async_request(zram, bio))
> > +		goto out;
> > +
> >  	__zram_make_sync_request(zram, bio);
> >  out:
> >  	return BLK_QC_T_NONE;
> > @@ -1028,8 +1457,12 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
> >  	bv.bv_len = PAGE_SIZE;
> >  	bv.bv_offset = 0;
> >  
> > -	err = zram_rw_sync_page(bdev, zram, &bv, index, offset, is_write);
> > +	err = zram_rw_async_page(zram, &bv, index, offset, is_write);
> > +	if (!err)
> > +		goto out;
> >  
> > +	err = zram_rw_sync_page(bdev, zram, &bv, index, offset, is_write);
> > +out:
> >  	return err;
> >  }
> >  
> > @@ -1066,7 +1499,6 @@ static void zram_reset_device(struct zram *zram)
> >  	/* Reset stats */
> >  	memset(&zram->stats, 0, sizeof(zram->stats));
> >  	zram->disksize = 0;
> > -
> >  	set_capacity(zram->disk, 0);
> >  	part_stat_set_all(&zram->disk->part0, 0);
> >  
> > @@ -1211,6 +1643,9 @@ static DEVICE_ATTR_RW(mem_limit);
> >  static DEVICE_ATTR_RW(mem_used_max);
> >  static DEVICE_ATTR_RW(max_comp_streams);
> >  static DEVICE_ATTR_RW(comp_algorithm);
> > +#ifdef CONFIG_ZRAM_ASYNC_IO
> > +static DEVICE_ATTR_RW(use_aio);
> > +#endif
> 
> hm... no real objection, but exporing this sysfs attr can be very hacky
> and difficult for people...

We have been used sysfs for tune the zram for a long time.
Please suggest ideas if you have better. :)

Thanks for reviewing, Sergey!

next prev parent reply	other threads:[~2016-10-24  4:47 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-22  6:42 [PATCH 1/3] zram: rename IO processing functions Minchan Kim
2016-09-22  6:42 ` [PATCH 2/3] zram: support page-based parallel write Minchan Kim
2016-09-29  3:18   ` Sergey Senozhatsky
2016-09-30  5:52     ` Minchan Kim
2016-10-04  4:43       ` Sergey Senozhatsky
2016-10-04  7:35         ` Minchan Kim
2016-10-05  2:01         ` Minchan Kim
2016-10-06  8:29           ` Sergey Senozhatsky
2016-10-07  6:33             ` Minchan Kim
2016-10-07 18:08               ` Sergey Senozhatsky
2016-10-17  5:04               ` Minchan Kim
2016-10-21  6:08                 ` Sergey Senozhatsky
2016-10-24  4:51                   ` Minchan Kim
2016-10-21  6:03   ` Sergey Senozhatsky
2016-10-24  4:47     ` Minchan Kim [this message]
2016-10-24  5:20       ` Sergey Senozhatsky
2016-10-24  5:58         ` Minchan Kim
2016-10-24  7:23           ` Sergey Senozhatsky
2016-09-22  6:42 ` [PATCH 3/3] zram: adjust the number of zram thread Minchan Kim
2016-10-21  6:23   ` Sergey Senozhatsky
2016-10-24  4:54     ` Minchan Kim
2016-10-24  5:29       ` Sergey Senozhatsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161024044714.GA4938@blaptop \
    --to=minchan@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sergey.senozhatsky.work@gmail.com \
    --cc=sergey.senozhatsky@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.