From: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
To: Minchan Kim <minchan@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
Sergey Senozhatsky <sergey.senozhatsky@gmail.com>,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH 2/3] zram: support page-based parallel write
Date: Fri, 21 Oct 2016 15:03:34 +0900 [thread overview]
Message-ID: <20161021060334.GA527@swordfish> (raw)
In-Reply-To: <1474526565-6676-2-git-send-email-minchan@kernel.org>
On (09/22/16 15:42), Minchan Kim wrote:
> +static ssize_t use_aio_store(struct device *dev,
> + struct device_attribute *attr, const char *buf, size_t len)
> +{
> + int ret;
> + u16 do_async;
> + struct zram *zram = dev_to_zram(dev);
> +
> + ret = kstrtou16(buf, 10, &do_async);
> + if (ret)
> + return ret;
> +
> + down_write(&zram->init_lock);
> + if (init_done(zram)) {
> + up_write(&zram->init_lock);
> + pr_info("Can't change for initialized device\n");
> + return -EBUSY;
> + }
> +
> + if (do_async)
> + zram->use_aio = true;
> + else
> + zram->use_aio = false;
a stupid nitpick:
zram->use_aio = do_async?
> + up_write(&zram->init_lock);
> +
> + return len;
> +}
> +#endif
> +
[..]
> +static void worker_wake_up(void)
> +{
> + /*
> + * Unless it's enough workes to handle accumulated requests,
> + * wake up new workers.
> + */
> + if (workers.nr_running * NR_BATCH_PAGES < workers.nr_req) {
> + int nr_wakeup = (workers.nr_req + NR_BATCH_PAGES) /
> + NR_BATCH_PAGES - workers.nr_running;
> +
> + wake_up_nr(&workers.req_wait, nr_wakeup);
> + }
> +}
> +
> +static void zram_unplug(struct blk_plug_cb *cb, bool from_schedule)
> +{
> + spin_lock(&workers.req_lock);
> + if (workers.nr_req)
> + worker_wake_up();
> + spin_unlock(&workers.req_lock);
> + kfree(cb);
> +}
> +
> +static int zram_check_plugged(void)
> +{
> + return !!blk_check_plugged(zram_unplug, NULL,
> + sizeof(struct blk_plug_cb));
> +}
I'm having some troubles understanding the purpose of zram_check_plugged().
it's a global symbol, can you just use it directly? otherwise we are
doing additional kmalloc/kfree, spin_lock/unlock and so on.
what am I missing? current->plug? can it affect us? how?
> +int queue_page_request(struct zram *zram, struct bio_vec *bvec, u32 index,
> + int offset, bool write)
static int?
> +{
> + struct page_request *page_req = kmalloc(sizeof(*page_req), GFP_NOIO);
> +
> + if (!page_req)
> + return -ENOMEM;
...
> +int queue_page_request_list(struct zram *zram, struct bio_request *bio_req,
static int?
> + struct bio_vec *bvec, u32 index, int offset,
> + bool write, struct list_head *page_list)
> +{
> + struct page_request *page_req = kmalloc(sizeof(*page_req), GFP_NOIO);
> +
> + if (!page_req) {
> + while (!list_empty(page_list)) {
> + page_req = list_first_entry(page_list,
> + struct page_request, list);
> + list_del(&page_req->list);
> + kfree(page_req);
> + }
> +
> + return -ENOMEM;
> + }
> +
> + page_req->bio_req = bio_req;
> + page_req->zram = zram;
> + page_req->bvec = *bvec;
> + page_req->index = index;
> + page_req->offset = offset;
> + page_req->write = write;
> + atomic_inc(&bio_req->nr_pages);
> +
> + list_add_tail(&page_req->list, page_list);
> +
> + return 0;
> +}
> +
> +/*
> + * @page_list: pages isolated from request queue
> + */
> +static void get_page_requests(struct list_head *page_list)
> +{
> + struct page_request *page_req;
> + int nr_pages;
> +
> + for (nr_pages = 0; nr_pages < NR_BATCH_PAGES; nr_pages++) {
> + if (list_empty(&workers.req_list))
> + break;
> +
> + page_req = list_first_entry(&workers.req_list,
> + struct page_request, list);
> + list_move(&page_req->list, page_list);
> + }
> +
> + workers.nr_req -= nr_pages;
> +}
> +
> +/*
> + * move @page_list to request queue and wake up workers if it is necessary.
> + */
> +static void run_worker(struct bio *bio, struct list_head *page_list,
> + unsigned int nr_pages)
> +{
> + WARN_ON(list_empty(page_list));
> +
> + spin_lock(&workers.req_lock);
> + list_splice_tail(page_list, &workers.req_list);
> + workers.nr_req += nr_pages;
> + if (bio->bi_opf & REQ_SYNC || !zram_check_plugged())
> + worker_wake_up();
> + spin_unlock(&workers.req_lock);
> +}
> +
> +static bool __zram_make_async_request(struct zram *zram, struct bio *bio)
> +{
> + int offset;
> + u32 index;
> + struct bio_vec bvec;
> + struct bvec_iter iter;
> + LIST_HEAD(page_list);
> + struct bio_request *bio_req;
> + unsigned int nr_pages = 0;
> + bool write = op_is_write(bio_op(bio));
> +
> + if (!zram->use_aio || !op_is_write(bio_op(bio)))
> + return false;
> +
> + bio_req = kmalloc(sizeof(*bio_req), GFP_NOIO);
> + if (!bio_req)
> + return false;
> +
> + index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
> + offset = (bio->bi_iter.bi_sector &
> + (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
> +
> + bio_req->bio = bio;
> + atomic_set(&bio_req->nr_pages, 0);
> +
> + bio_for_each_segment(bvec, bio, iter) {
> + int max_transfer_size = PAGE_SIZE - offset;
> +
> + if (bvec.bv_len > max_transfer_size) {
> + struct bio_vec bv;
> +
> + bv.bv_page = bvec.bv_page;
> + bv.bv_len = max_transfer_size;
> + bv.bv_offset = bvec.bv_offset;
> +
> + if (queue_page_request_list(zram, bio_req, &bv,
> + index, offset, write, &page_list))
> + goto fail;
> +
> + bv.bv_len = bvec.bv_len - max_transfer_size;
> + bv.bv_offset += max_transfer_size;
> + if (queue_page_request_list(zram, bio_req, &bv,
> + index + 1, 0, write, &page_list))
> + goto fail;
> + } else {
> + if (queue_page_request_list(zram, bio_req, &bvec,
> + index, offset, write, &page_list))
> + goto fail;
> + }
> +
> + nr_pages++;
> + update_position(&index, &offset, &bvec);
> + }
> +
> + run_worker(bio, &page_list, nr_pages);
> +
> + return true;
> +fail:
> + kfree(bio_req);
> +
> + WARN_ON(!list_empty(&page_list));
> +
> + return false;
> +}
> +
> +void page_requests_rw(struct list_head *page_list)
> +{
> + struct page_request *page_req;
> + bool write;
> + struct page *page;
> + struct zram *zram;
> + int err;
> + bool free_bio;
> + struct bio_request *bio_req;
> +
> + while (!list_empty(page_list)) {
> + free_bio = false;
> + page_req = list_last_entry(page_list, struct page_request,
> + list);
> + write = page_req->write;
> + page = page_req->bvec.bv_page;
> + zram = page_req->zram;
> + bio_req = page_req->bio_req;
> + if (bio_req && atomic_dec_and_test(&bio_req->nr_pages))
> + free_bio = true;
> + list_del(&page_req->list);
> +
> + err = zram_bvec_rw(zram, &page_req->bvec, page_req->index,
> + page_req->offset, page_req->write);
> +
> + kfree(page_req);
> +
> + /* page-based request */
> + if (!bio_req) {
> + page_endio(page, write, err);
> + zram_meta_put(zram);
who is calling zram_meta_get()? I mean in this loop...
what if the loop continues after that `if'?
> + /* bio-based request */
> + } else if (free_bio) {
> + if (likely(!err))
> + bio_endio(bio_req->bio);
> + else
> + bio_io_error(bio_req->bio);
> + kfree(bio_req);
> + zram_meta_put(zram);
ditto, who is calling zram_meta_get()?
what if the loop continue after that `if'?
we do zram_meta_get() only once in zram_make_request(). but then
put meta for every request in page_list, while a bio passed to
__zram_make_async_request() can span across several requests in
page_list. right?
> + }
> + }
> +}
> +
> +static int zram_thread(void *data)
> +{
> + DEFINE_WAIT(wait);
> + LIST_HEAD(page_list);
> +
> + spin_lock(&workers.req_lock);
> + workers.nr_running++;
> +
> + while (!kthread_should_stop()) {
> + if (list_empty(&workers.req_list)) {
> + prepare_to_wait_exclusive(&workers.req_wait, &wait,
> + TASK_INTERRUPTIBLE);
> + workers.nr_running--;
> + spin_unlock(&workers.req_lock);
> + schedule();
> + spin_lock(&workers.req_lock);
> + finish_wait(&workers.req_wait, &wait);
> + workers.nr_running++;
> + continue;
> + }
> +
> + get_page_requests(&page_list);
> + if (list_empty(&page_list))
> + continue;
> +
> + spin_unlock(&workers.req_lock);
> + page_requests_rw(&page_list);
> + cond_resched();
> + spin_lock(&workers.req_lock);
> + }
> +
> + workers.nr_running--;
> + spin_unlock(&workers.req_lock);
> +
> + return 0;
> +}
> +
> +static void destroy_workers(void)
> +{
> + struct zram_worker *worker;
> +
> + while (!list_empty(&workers.worker_list)) {
> + worker = list_first_entry(&workers.worker_list,
> + struct zram_worker,
> + list);
> + kthread_stop(worker->task);
> + list_del(&worker->list);
> + kfree(worker);
> + }
> +
> + WARN_ON(workers.nr_running);
> +}
> +
> +static int create_workers(void)
> +{
> + int i;
> + int nr_cpu = num_online_cpus();
> + struct zram_worker *worker;
> +
> + INIT_LIST_HEAD(&workers.worker_list);
> + INIT_LIST_HEAD(&workers.req_list);
> + spin_lock_init(&workers.req_lock);
> + init_waitqueue_head(&workers.req_wait);
> +
> + for (i = 0; i < nr_cpu; i++) {
> + worker = kmalloc(sizeof(*worker), GFP_KERNEL);
> + if (!worker)
> + goto error;
> +
> + worker->task = kthread_run(zram_thread, NULL, "zramd-%d", i);
> + if (IS_ERR(worker->task)) {
> + kfree(worker);
> + goto error;
> + }
> +
> + list_add(&worker->list, &workers.worker_list);
> + }
> +
> + return 0;
> +
> +error:
> + destroy_workers();
> + return 1;
> +}
> +
> +static int zram_rw_async_page(struct zram *zram,
> + struct bio_vec *bv, u32 index, int offset,
> + bool is_write)
> +{
> + if (!zram->use_aio || !is_write)
> + return 1;
> +
> + return queue_page_request(zram, bv, index, offset, is_write);
> +}
> +#else
> +static bool __zram_make_async_request(struct zram *zram, struct bio *bio)
> +{
> + return false;
> +}
> +
> +static int zram_rw_async_page(struct zram *zram,
> + struct bio_vec *bv, u32 index, int offset,
> + bool is_write)
> +{
> + return 1;
> +}
> +
> +static int create_workers(void)
> +{
> + return 0;
> +}
> +
> +static void destroy_workers(void)
> +{
> +}
> +#endif
> +
> /*
> * Handler function for all zram I/O requests.
> */
> @@ -954,6 +1380,9 @@ static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
> goto out;
> }
>
> + if (__zram_make_async_request(zram, bio))
> + goto out;
> +
> __zram_make_sync_request(zram, bio);
> out:
> return BLK_QC_T_NONE;
> @@ -1028,8 +1457,12 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
> bv.bv_len = PAGE_SIZE;
> bv.bv_offset = 0;
>
> - err = zram_rw_sync_page(bdev, zram, &bv, index, offset, is_write);
> + err = zram_rw_async_page(zram, &bv, index, offset, is_write);
> + if (!err)
> + goto out;
>
> + err = zram_rw_sync_page(bdev, zram, &bv, index, offset, is_write);
> +out:
> return err;
> }
>
> @@ -1066,7 +1499,6 @@ static void zram_reset_device(struct zram *zram)
> /* Reset stats */
> memset(&zram->stats, 0, sizeof(zram->stats));
> zram->disksize = 0;
> -
> set_capacity(zram->disk, 0);
> part_stat_set_all(&zram->disk->part0, 0);
>
> @@ -1211,6 +1643,9 @@ static DEVICE_ATTR_RW(mem_limit);
> static DEVICE_ATTR_RW(mem_used_max);
> static DEVICE_ATTR_RW(max_comp_streams);
> static DEVICE_ATTR_RW(comp_algorithm);
> +#ifdef CONFIG_ZRAM_ASYNC_IO
> +static DEVICE_ATTR_RW(use_aio);
> +#endif
hm... no real objection, but exporing this sysfs attr can be very hacky
and difficult for people...
-ss
> static struct attribute *zram_disk_attrs[] = {
> &dev_attr_disksize.attr,
> @@ -1231,6 +1666,9 @@ static struct attribute *zram_disk_attrs[] = {
> &dev_attr_mem_used_max.attr,
> &dev_attr_max_comp_streams.attr,
> &dev_attr_comp_algorithm.attr,
> +#ifdef CONFIG_ZRAM_ASYNC_IO
> + &dev_attr_use_aio.attr,
> +#endif
> &dev_attr_io_stat.attr,
> &dev_attr_mm_stat.attr,
> &dev_attr_debug_stat.attr,
> @@ -1261,7 +1699,9 @@ static int zram_add(void)
> device_id = ret;
>
> init_rwsem(&zram->init_lock);
> -
> +#ifdef CONFIG_ZRAM_ASYNC_IO
> + zram->use_aio = true;
> +#endif
> queue = blk_alloc_queue(GFP_KERNEL);
> if (!queue) {
> pr_err("Error allocating disk queue for device %d\n",
> @@ -1485,6 +1925,9 @@ static int __init zram_init(void)
> num_devices--;
> }
>
> + if (create_workers())
> + goto out_error;
> +
> return 0;
>
> out_error:
> @@ -1495,6 +1938,7 @@ static int __init zram_init(void)
> static void __exit zram_exit(void)
> {
> destroy_devices();
> + destroy_workers();
> }
>
> module_init(zram_init);
> diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
> index 74fcf10da374..3f62501f619f 100644
> --- a/drivers/block/zram/zram_drv.h
> +++ b/drivers/block/zram/zram_drv.h
> @@ -119,5 +119,8 @@ struct zram {
> * zram is claimed so open request will be failed
> */
> bool claim; /* Protected by bdev->bd_mutex */
> +#ifdef CONFIG_ZRAM_ASYNC_IO
> + bool use_aio;
> +#endif
> };
> #endif
> --
> 2.7.4
>
next prev parent reply other threads:[~2016-10-21 6:03 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-09-22 6:42 [PATCH 1/3] zram: rename IO processing functions Minchan Kim
2016-09-22 6:42 ` [PATCH 2/3] zram: support page-based parallel write Minchan Kim
2016-09-29 3:18 ` Sergey Senozhatsky
2016-09-30 5:52 ` Minchan Kim
2016-10-04 4:43 ` Sergey Senozhatsky
2016-10-04 7:35 ` Minchan Kim
2016-10-05 2:01 ` Minchan Kim
2016-10-06 8:29 ` Sergey Senozhatsky
2016-10-07 6:33 ` Minchan Kim
2016-10-07 18:08 ` Sergey Senozhatsky
2016-10-17 5:04 ` Minchan Kim
2016-10-21 6:08 ` Sergey Senozhatsky
2016-10-24 4:51 ` Minchan Kim
2016-10-21 6:03 ` Sergey Senozhatsky [this message]
2016-10-24 4:47 ` Minchan Kim
2016-10-24 5:20 ` Sergey Senozhatsky
2016-10-24 5:58 ` Minchan Kim
2016-10-24 7:23 ` Sergey Senozhatsky
2016-09-22 6:42 ` [PATCH 3/3] zram: adjust the number of zram thread Minchan Kim
2016-10-21 6:23 ` Sergey Senozhatsky
2016-10-24 4:54 ` Minchan Kim
2016-10-24 5:29 ` Sergey Senozhatsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20161021060334.GA527@swordfish \
--to=sergey.senozhatsky.work@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=minchan@kernel.org \
--cc=sergey.senozhatsky@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.