All of lore.kernel.org
 help / color / mirror / Atom feed
From: Minchan Kim <minchan@kernel.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	Nitin Gupta <ngupta@vflare.org>,
	Jerome Marchand <jmarchan@redhat.com>,
	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>,
	Ganesh Mahendran <opensource.ganesh@gmail.com>,
	Minchan Kim <minchan@kernel.org>
Subject: [PATCH v1 2/2] zram: remove init_lock in zram_make_request
Date: Wed, 28 Jan 2015 17:15:45 +0900	[thread overview]
Message-ID: <1422432945-6764-2-git-send-email-minchan@kernel.org> (raw)
In-Reply-To: <1422432945-6764-1-git-send-email-minchan@kernel.org>

Admin could reset zram during I/O operation going on so we have
used zram->init_lock as read-side lock in I/O path to prevent
sudden zram meta freeing.

However, the init_lock is really troublesome.
We can't do call zram_meta_alloc under init_lock due to lockdep splat
because zram_rw_page is one of the function under reclaim path and
hold it as read_lock while other places in process context hold it
as write_lock. So, we have used allocation out of the lock to avoid
lockdep warn but it's not good for readability and fainally, I met
another lockdep splat between init_lock and cpu_hotpulug from
kmem_cache_destroy during wokring zsmalloc compaction. :(

Yes, the ideal is to remove horrible init_lock of zram in rw path.
This patch removes it in rw path and instead, put init_done bool
variable to check initialization done with smp_[wmb|rmb] and
srcu_[un]read_lock to prevent sudden zram meta freeing
during I/O operation.

Signed-off-by: Minchan Kim <minchan@kernel.org>
---
 drivers/block/zram/zram_drv.c | 85 +++++++++++++++++++++++++++++++------------
 drivers/block/zram/zram_drv.h |  5 +++
 2 files changed, 66 insertions(+), 24 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index a598ada817f0..b33add453027 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -32,6 +32,7 @@
 #include <linux/string.h>
 #include <linux/vmalloc.h>
 #include <linux/err.h>
+#include <linux/srcu.h>
 
 #include "zram_drv.h"
 
@@ -53,9 +54,31 @@ static ssize_t name##_show(struct device *d,		\
 }									\
 static DEVICE_ATTR_RO(name);
 
-static inline int init_done(struct zram *zram)
+static inline bool init_done(struct zram *zram)
 {
-	return zram->meta != NULL;
+	/*
+	 * init_done can be used without holding zram->init_lock in
+	 * read/write handler(ie, zram_make_request) but we should make sure
+	 * that zram->init_done should set up after meta initialization is
+	 * done. Look at setup_init_done.
+	 */
+	bool ret = zram->init_done;
+
+	if (ret)
+		smp_rmb(); /* pair with setup_init_done */
+
+	return ret;
+}
+
+static inline void setup_init_done(struct zram *zram, bool flag)
+{
+	/*
+	 * Store operation of struct zram fields should complete
+	 * before zram->init_done set up because zram_bvec_rw
+	 * doesn't hold an zram->init_lock.
+	 */
+	smp_wmb();
+	zram->init_done = flag;
 }
 
 static inline struct zram *dev_to_zram(struct device *dev)
@@ -326,6 +349,10 @@ static void zram_meta_free(struct zram_meta *meta)
 	kfree(meta);
 }
 
+static void rcu_zram_do_nothing(struct rcu_head *unused)
+{
+}
+
 static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize)
 {
 	char pool_name[8];
@@ -726,11 +753,8 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)
 		return;
 	}
 
-	zcomp_destroy(zram->comp);
 	zram->max_comp_streams = 1;
 
-	zram_meta_free(zram->meta);
-	zram->meta = NULL;
 	/* Reset stats */
 	memset(&zram->stats, 0, sizeof(zram->stats));
 
@@ -738,8 +762,12 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)
 	if (reset_capacity)
 		set_capacity(zram->disk, 0);
 
+	setup_init_done(zram, false);
+	call_srcu(&zram->srcu, &zram->rcu, rcu_zram_do_nothing);
+	synchronize_srcu(&zram->srcu);
+	zram_meta_free(zram->meta);
+	zcomp_destroy(zram->comp);
 	up_write(&zram->init_lock);
-
 	/*
 	 * Revalidate disk out of the init_lock to avoid lockdep splat.
 	 * It's okay because disk's capacity is protected by init_lock
@@ -762,10 +790,19 @@ static ssize_t disksize_store(struct device *dev,
 	if (!disksize)
 		return -EINVAL;
 
+	down_write(&zram->init_lock);
+	if (init_done(zram)) {
+		pr_info("Cannot change disksize for initialized device\n");
+		up_write(&zram->init_lock);
+		return -EBUSY;
+	}
+
 	disksize = PAGE_ALIGN(disksize);
 	meta = zram_meta_alloc(zram->disk->first_minor, disksize);
-	if (!meta)
+	if (!meta) {
+		up_write(&zram->init_lock);
 		return -ENOMEM;
+	}
 
 	comp = zcomp_create(zram->compressor, zram->max_comp_streams);
 	if (IS_ERR(comp)) {
@@ -775,17 +812,11 @@ static ssize_t disksize_store(struct device *dev,
 		goto out_free_meta;
 	}
 
-	down_write(&zram->init_lock);
-	if (init_done(zram)) {
-		pr_info("Cannot change disksize for initialized device\n");
-		err = -EBUSY;
-		goto out_destroy_comp;
-	}
-
 	zram->meta = meta;
 	zram->comp = comp;
 	zram->disksize = disksize;
 	set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
+	setup_init_done(zram, true);
 	up_write(&zram->init_lock);
 
 	/*
@@ -797,10 +828,8 @@ static ssize_t disksize_store(struct device *dev,
 
 	return len;
 
-out_destroy_comp:
-	up_write(&zram->init_lock);
-	zcomp_destroy(comp);
 out_free_meta:
+	up_write(&zram->init_lock);
 	zram_meta_free(meta);
 	return err;
 }
@@ -905,9 +934,10 @@ out:
  */
 static void zram_make_request(struct request_queue *queue, struct bio *bio)
 {
+	int idx;
 	struct zram *zram = queue->queuedata;
 
-	down_read(&zram->init_lock);
+	idx = srcu_read_lock(&zram->srcu);
 	if (unlikely(!init_done(zram)))
 		goto error;
 
@@ -918,12 +948,12 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
 	}
 
 	__zram_make_request(zram, bio);
-	up_read(&zram->init_lock);
+	srcu_read_unlock(&zram->srcu, idx);
 
 	return;
 
 error:
-	up_read(&zram->init_lock);
+	srcu_read_unlock(&zram->srcu, idx);
 	bio_io_error(bio);
 }
 
@@ -945,18 +975,20 @@ static void zram_slot_free_notify(struct block_device *bdev,
 static int zram_rw_page(struct block_device *bdev, sector_t sector,
 		       struct page *page, int rw)
 {
-	int offset, err;
+	int offset, err, idx;
 	u32 index;
 	struct zram *zram;
 	struct bio_vec bv;
 
 	zram = bdev->bd_disk->private_data;
+	idx = srcu_read_lock(&zram->srcu);
+
 	if (!valid_io_request(zram, sector, PAGE_SIZE)) {
 		atomic64_inc(&zram->stats.invalid_io);
+		srcu_read_unlock(&zram->srcu, idx);
 		return -EINVAL;
 	}
 
-	down_read(&zram->init_lock);
 	if (unlikely(!init_done(zram))) {
 		err = -EIO;
 		goto out_unlock;
@@ -971,7 +1003,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
 
 	err = zram_bvec_rw(zram, &bv, index, offset, rw);
 out_unlock:
-	up_read(&zram->init_lock);
+	srcu_read_unlock(&zram->srcu, idx);
 	/*
 	 * If I/O fails, just return error(ie, non-zero) without
 	 * calling page_endio.
@@ -1041,6 +1073,11 @@ static int create_device(struct zram *zram, int device_id)
 
 	init_rwsem(&zram->init_lock);
 
+	if (init_srcu_struct(&zram->srcu)) {
+		pr_err("Error initialize srcu for device %d\n", device_id);
+		goto out;
+	}
+
 	zram->queue = blk_alloc_queue(GFP_KERNEL);
 	if (!zram->queue) {
 		pr_err("Error allocating disk queue for device %d\n",
@@ -1125,8 +1162,8 @@ static void destroy_device(struct zram *zram)
 
 	del_gendisk(zram->disk);
 	put_disk(zram->disk);
-
 	blk_cleanup_queue(zram->queue);
+	cleanup_srcu_struct(&zram->srcu);
 }
 
 static int __init zram_init(void)
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index e492f6bf11f1..2042c310aea8 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -105,8 +105,13 @@ struct zram {
 	struct gendisk *disk;
 	struct zcomp *comp;
 
+	struct srcu_struct srcu;
+	struct rcu_head rcu;
+
 	/* Prevent concurrent execution of device init, reset and R/W request */
 	struct rw_semaphore init_lock;
+	bool init_done;
+
 	/*
 	 * This is the limit on amount of *uncompressed* worth of data
 	 * we can store in a disk.
-- 
1.9.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2015-01-28  8:15 UTC|newest]

Thread overview: 90+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-28  8:15 [PATCH 1/2] zram: free meta table in zram_meta_free Minchan Kim
2015-01-28  8:15 ` Minchan Kim [this message]
2015-01-28 14:56   ` [PATCH v1 2/2] zram: remove init_lock in zram_make_request Sergey Senozhatsky
2015-01-28 14:56     ` Sergey Senozhatsky
2015-01-28 15:04     ` Sergey Senozhatsky
2015-01-28 15:04       ` Sergey Senozhatsky
2015-01-28 23:33     ` Minchan Kim
2015-01-28 23:33       ` Minchan Kim
2015-01-29  1:57       ` Sergey Senozhatsky
2015-01-29  2:01         ` Minchan Kim
2015-01-29  2:01           ` Minchan Kim
2015-01-29  2:22           ` Sergey Senozhatsky
2015-01-29  2:22             ` Sergey Senozhatsky
2015-01-29  5:28             ` Minchan Kim
2015-01-29  5:28               ` Minchan Kim
2015-01-29  6:06               ` Sergey Senozhatsky
2015-01-29  6:06                 ` Sergey Senozhatsky
2015-01-29  6:35                 ` Minchan Kim
2015-01-29  6:35                   ` Minchan Kim
2015-01-29  7:08                   ` Sergey Senozhatsky
2015-01-29  7:08                     ` Sergey Senozhatsky
2015-01-30 14:41                     ` Minchan Kim
2015-01-30 14:41                       ` Minchan Kim
2015-01-31 11:31                       ` Sergey Senozhatsky
2015-01-31 11:31                         ` Sergey Senozhatsky
2015-02-01 14:50                       ` Sergey Senozhatsky
2015-02-01 14:50                         ` Sergey Senozhatsky
2015-02-01 15:04                         ` Sergey Senozhatsky
2015-02-01 15:04                           ` Sergey Senozhatsky
2015-02-02  1:43                           ` Minchan Kim
2015-02-02  1:43                             ` Minchan Kim
2015-02-02  1:59                             ` Sergey Senozhatsky
2015-02-02  1:59                               ` Sergey Senozhatsky
2015-02-02  2:45                               ` Minchan Kim
2015-02-02  2:45                                 ` Minchan Kim
2015-02-02  3:47                                 ` Sergey Senozhatsky
2015-02-02  3:47                                   ` Sergey Senozhatsky
2015-02-02  1:30                         ` Minchan Kim
2015-02-02  1:30                           ` Minchan Kim
2015-02-02  1:48                           ` Sergey Senozhatsky
2015-02-02  1:48                             ` Sergey Senozhatsky
2015-02-02  2:44                             ` Minchan Kim
2015-02-02  2:44                               ` Minchan Kim
2015-02-02  4:01                               ` Sergey Senozhatsky
2015-02-02  4:01                                 ` Sergey Senozhatsky
2015-02-02  4:28                                 ` Minchan Kim
2015-02-02  4:28                                   ` Minchan Kim
2015-02-02  5:09                                   ` Sergey Senozhatsky
2015-02-02  5:09                                     ` Sergey Senozhatsky
2015-02-02  5:18                                     ` Minchan Kim
2015-02-02  5:18                                       ` Minchan Kim
2015-02-02  5:28                                       ` Sergey Senozhatsky
2015-02-02  5:28                                         ` Sergey Senozhatsky
2015-02-02  5:10                                   ` Minchan Kim
2015-02-02  5:10                                     ` Minchan Kim
2015-01-30  0:20                   ` Sergey Senozhatsky
2015-01-29 13:48   ` Ganesh Mahendran
2015-01-29 13:48     ` Ganesh Mahendran
2015-01-29 15:12     ` Sergey Senozhatsky
2015-01-29 15:12       ` Sergey Senozhatsky
2015-01-30  7:52       ` Ganesh Mahendran
2015-01-30  7:52         ` Ganesh Mahendran
2015-01-30  8:08         ` Sergey Senozhatsky
2015-01-30  8:08           ` Sergey Senozhatsky
2015-01-31  8:50           ` Ganesh Mahendran
2015-01-31  8:50             ` Ganesh Mahendran
2015-01-31 11:07             ` Sergey Senozhatsky
2015-01-31 11:07               ` Sergey Senozhatsky
2015-01-31 12:59               ` Ganesh Mahendran
2015-01-31 12:59                 ` Ganesh Mahendran
2015-01-28 14:19 ` [PATCH 1/2] zram: free meta table in zram_meta_free Sergey Senozhatsky
2015-01-28 14:19   ` Sergey Senozhatsky
2015-01-28 23:17   ` Minchan Kim
2015-01-28 23:17     ` Minchan Kim
2015-01-29  1:49     ` Ganesh Mahendran
2015-01-29  1:49       ` Ganesh Mahendran
  -- strict thread matches above, loose matches on Subject: below --
2015-02-02  3:41 [PATCH v1 2/2] zram: remove init_lock in zram_make_request Minchan Kim
2015-02-02  3:41 ` Minchan Kim
2015-02-02  5:59 ` Sergey Senozhatsky
2015-02-02  5:59   ` Sergey Senozhatsky
2015-02-02  6:18   ` Minchan Kim
2015-02-02  6:18     ` Minchan Kim
2015-02-02  7:06     ` Sergey Senozhatsky
2015-02-02  7:06       ` Sergey Senozhatsky
2015-02-03  1:54       ` Sergey Senozhatsky
2015-02-03  1:54         ` Sergey Senozhatsky
2015-02-03  3:02         ` Minchan Kim
2015-02-03  3:02           ` Minchan Kim
2015-02-03  3:56           ` Sergey Senozhatsky
2015-02-03  3:56             ` Sergey Senozhatsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1422432945-6764-2-git-send-email-minchan@kernel.org \
    --to=minchan@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=jmarchan@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ngupta@vflare.org \
    --cc=opensource.ganesh@gmail.com \
    --cc=sergey.senozhatsky@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.