All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <20150126160007.GC528@blaptop>

diff --git a/a/1.txt b/N1/1.txt
index 0434ec5..1474ac7 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -183,3 +183,261 @@ How about this?
 
 It's based on Ganesh's patch.
 https://lkml.org/lkml/2015/1/24/50
+
+>From afda9fd2f6c40dd0745d8a6babe78c5cbdceddf5 Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Mon, 26 Jan 2015 14:34:10 +0900
+Subject: [RFC] zram: remove init_lock in zram_make_request
+
+Admin could reset zram during I/O operation going on so we have
+used zram->init_lock as read-side lock in I/O path to prevent
+sudden zram meta freeing.
+
+However, the init_lock is really troublesome.
+We can't do call zram_meta_alloc under init_lock due to lockdep splat
+because zram_rw_page is one of the function under reclaim path and
+hold it as read_lock while other places in process context hold it
+as write_lock. So, we have used allocation out of the lock to avoid
+lockdep warn but it's not good for readability and fainally, I met
+another lockdep splat between init_lock and cpu_hotpulug from
+kmem_cache_destroy during wokring zsmalloc compaction. :(
+
+Yes, the ideal is to remove horrible init_lock of zram in rw path.
+This patch removes it in rw path and instead, put init_done bool
+variable to check initialization done with smp_[wmb|rmb] and
+srcu_[un]read_lock to prevent sudden zram meta freeing
+during I/O operation.
+
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+---
+ drivers/block/zram/zram_drv.c | 76 +++++++++++++++++++++++++++++--------------
+ drivers/block/zram/zram_drv.h |  5 +++
+ 2 files changed, 57 insertions(+), 24 deletions(-)
+
+diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
+index a598ada817f0..e06ff975f997 100644
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -32,6 +32,7 @@
+ #include <linux/string.h>
+ #include <linux/vmalloc.h>
+ #include <linux/err.h>
++#include <linux/srcu.h>
+ 
+ #include "zram_drv.h"
+ 
+@@ -53,9 +54,16 @@ static ssize_t name##_show(struct device *d,		\
+ }									\
+ static DEVICE_ATTR_RO(name);
+ 
+-static inline int init_done(struct zram *zram)
++static inline bool init_done(struct zram *zram)
+ {
+-	return zram->meta != NULL;
++	/*
++	 * init_done can be used without holding zram->init_lock in
++	 * read/write handler(ie, zram_make_request) but we should make sure
++	 * that zram->init_done should set up after meta initialization is
++	 * done. Look at disksize_store.
++	 */
++	smp_rmb();
++	return zram->init_done;
+ }
+ 
+ static inline struct zram *dev_to_zram(struct device *dev)
+@@ -326,6 +334,10 @@ static void zram_meta_free(struct zram_meta *meta)
+ 	kfree(meta);
+ }
+ 
++static void rcu_zram_do_nothing(struct rcu_head *unused)
++{
++}
++
+ static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize)
+ {
+ 	char pool_name[8];
+@@ -726,11 +738,8 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)
+ 		return;
+ 	}
+ 
+-	zcomp_destroy(zram->comp);
+ 	zram->max_comp_streams = 1;
+ 
+-	zram_meta_free(zram->meta);
+-	zram->meta = NULL;
+ 	/* Reset stats */
+ 	memset(&zram->stats, 0, sizeof(zram->stats));
+ 
+@@ -738,8 +747,12 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)
+ 	if (reset_capacity)
+ 		set_capacity(zram->disk, 0);
+ 
++	zram->init_done = false;
++	call_srcu(&zram->srcu, &zram->rcu, rcu_zram_do_nothing);
++	synchronize_srcu(&zram->srcu);
++	zram_meta_free(zram->meta);
++	zcomp_destroy(zram->comp);
+ 	up_write(&zram->init_lock);
+-
+ 	/*
+ 	 * Revalidate disk out of the init_lock to avoid lockdep splat.
+ 	 * It's okay because disk's capacity is protected by init_lock
+@@ -762,10 +775,19 @@ static ssize_t disksize_store(struct device *dev,
+ 	if (!disksize)
+ 		return -EINVAL;
+ 
++	down_write(&zram->init_lock);
++	if (init_done(zram)) {
++		pr_info("Cannot change disksize for initialized device\n");
++		up_write(&zram->init_lock);
++		return -EBUSY;
++	}
++
+ 	disksize = PAGE_ALIGN(disksize);
+ 	meta = zram_meta_alloc(zram->disk->first_minor, disksize);
+-	if (!meta)
++	if (!meta) {
++		up_write(&zram->init_lock);
+ 		return -ENOMEM;
++	}
+ 
+ 	comp = zcomp_create(zram->compressor, zram->max_comp_streams);
+ 	if (IS_ERR(comp)) {
+@@ -775,17 +797,17 @@ static ssize_t disksize_store(struct device *dev,
+ 		goto out_free_meta;
+ 	}
+ 
+-	down_write(&zram->init_lock);
+-	if (init_done(zram)) {
+-		pr_info("Cannot change disksize for initialized device\n");
+-		err = -EBUSY;
+-		goto out_destroy_comp;
+-	}
+-
+ 	zram->meta = meta;
+ 	zram->comp = comp;
+ 	zram->disksize = disksize;
+ 	set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
++	/*
++	 * Store operation of struct zram fields should complete
++	 * before init_done set up because zram_bvec_rw doesn't
++	 * hold an zram->init_lock.
++	 */
++	smp_wmb();
++	zram->init_done = true;
+ 	up_write(&zram->init_lock);
+ 
+ 	/*
+@@ -797,10 +819,8 @@ static ssize_t disksize_store(struct device *dev,
+ 
+ 	return len;
+ 
+-out_destroy_comp:
+-	up_write(&zram->init_lock);
+-	zcomp_destroy(comp);
+ out_free_meta:
++	up_write(&zram->init_lock);
+ 	zram_meta_free(meta);
+ 	return err;
+ }
+@@ -905,9 +925,10 @@ out:
+  */
+ static void zram_make_request(struct request_queue *queue, struct bio *bio)
+ {
++	int idx;
+ 	struct zram *zram = queue->queuedata;
+ 
+-	down_read(&zram->init_lock);
++	idx = srcu_read_lock(&zram->srcu);
+ 	if (unlikely(!init_done(zram)))
+ 		goto error;
+ 
+@@ -918,12 +939,12 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
+ 	}
+ 
+ 	__zram_make_request(zram, bio);
+-	up_read(&zram->init_lock);
++	srcu_read_unlock(&zram->srcu, idx);
+ 
+ 	return;
+ 
+ error:
+-	up_read(&zram->init_lock);
++	srcu_read_unlock(&zram->srcu, idx);
+ 	bio_io_error(bio);
+ }
+ 
+@@ -945,18 +966,20 @@ static void zram_slot_free_notify(struct block_device *bdev,
+ static int zram_rw_page(struct block_device *bdev, sector_t sector,
+ 		       struct page *page, int rw)
+ {
+-	int offset, err;
++	int offset, err, idx;
+ 	u32 index;
+ 	struct zram *zram;
+ 	struct bio_vec bv;
+ 
+ 	zram = bdev->bd_disk->private_data;
++	idx = srcu_read_lock(&zram->srcu);
++
+ 	if (!valid_io_request(zram, sector, PAGE_SIZE)) {
+ 		atomic64_inc(&zram->stats.invalid_io);
++		srcu_read_unlock(&zram->srcu, idx);
+ 		return -EINVAL;
+ 	}
+ 
+-	down_read(&zram->init_lock);
+ 	if (unlikely(!init_done(zram))) {
+ 		err = -EIO;
+ 		goto out_unlock;
+@@ -971,7 +994,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
+ 
+ 	err = zram_bvec_rw(zram, &bv, index, offset, rw);
+ out_unlock:
+-	up_read(&zram->init_lock);
++	srcu_read_unlock(&zram->srcu, idx);
+ 	/*
+ 	 * If I/O fails, just return error(ie, non-zero) without
+ 	 * calling page_endio.
+@@ -1041,6 +1064,11 @@ static int create_device(struct zram *zram, int device_id)
+ 
+ 	init_rwsem(&zram->init_lock);
+ 
++	if (init_srcu_struct(&zram->srcu)) {
++		pr_err("Error initialize srcu for device %d\n", device_id);
++		goto out;
++	}
++
+ 	zram->queue = blk_alloc_queue(GFP_KERNEL);
+ 	if (!zram->queue) {
+ 		pr_err("Error allocating disk queue for device %d\n",
+@@ -1125,8 +1153,8 @@ static void destroy_device(struct zram *zram)
+ 
+ 	del_gendisk(zram->disk);
+ 	put_disk(zram->disk);
+-
+ 	blk_cleanup_queue(zram->queue);
++	cleanup_srcu_struct(&zram->srcu);
+ }
+ 
+ static int __init zram_init(void)
+diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
+index e492f6bf11f1..2042c310aea8 100644
+--- a/drivers/block/zram/zram_drv.h
++++ b/drivers/block/zram/zram_drv.h
+@@ -105,8 +105,13 @@ struct zram {
+ 	struct gendisk *disk;
+ 	struct zcomp *comp;
+ 
++	struct srcu_struct srcu;
++	struct rcu_head rcu;
++
+ 	/* Prevent concurrent execution of device init, reset and R/W request */
+ 	struct rw_semaphore init_lock;
++	bool init_done;
++
+ 	/*
+ 	 * This is the limit on amount of *uncompressed* worth of data
+ 	 * we can store in a disk.
+-- 
+1.9.1
diff --git a/a/content_digest b/N1/content_digest
index 6bac028..a172b59 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -200,6 +200,264 @@
  "How about this?\n"
  "\n"
  "It's based on Ganesh's patch.\n"
- https://lkml.org/lkml/2015/1/24/50
+ "https://lkml.org/lkml/2015/1/24/50\n"
+ "\n"
+ ">From afda9fd2f6c40dd0745d8a6babe78c5cbdceddf5 Mon Sep 17 00:00:00 2001\n"
+ "From: Minchan Kim <minchan@kernel.org>\n"
+ "Date: Mon, 26 Jan 2015 14:34:10 +0900\n"
+ "Subject: [RFC] zram: remove init_lock in zram_make_request\n"
+ "\n"
+ "Admin could reset zram during I/O operation going on so we have\n"
+ "used zram->init_lock as read-side lock in I/O path to prevent\n"
+ "sudden zram meta freeing.\n"
+ "\n"
+ "However, the init_lock is really troublesome.\n"
+ "We can't do call zram_meta_alloc under init_lock due to lockdep splat\n"
+ "because zram_rw_page is one of the function under reclaim path and\n"
+ "hold it as read_lock while other places in process context hold it\n"
+ "as write_lock. So, we have used allocation out of the lock to avoid\n"
+ "lockdep warn but it's not good for readability and fainally, I met\n"
+ "another lockdep splat between init_lock and cpu_hotpulug from\n"
+ "kmem_cache_destroy during wokring zsmalloc compaction. :(\n"
+ "\n"
+ "Yes, the ideal is to remove horrible init_lock of zram in rw path.\n"
+ "This patch removes it in rw path and instead, put init_done bool\n"
+ "variable to check initialization done with smp_[wmb|rmb] and\n"
+ "srcu_[un]read_lock to prevent sudden zram meta freeing\n"
+ "during I/O operation.\n"
+ "\n"
+ "Signed-off-by: Minchan Kim <minchan@kernel.org>\n"
+ "---\n"
+ " drivers/block/zram/zram_drv.c | 76 +++++++++++++++++++++++++++++--------------\n"
+ " drivers/block/zram/zram_drv.h |  5 +++\n"
+ " 2 files changed, 57 insertions(+), 24 deletions(-)\n"
+ "\n"
+ "diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c\n"
+ "index a598ada817f0..e06ff975f997 100644\n"
+ "--- a/drivers/block/zram/zram_drv.c\n"
+ "+++ b/drivers/block/zram/zram_drv.c\n"
+ "@@ -32,6 +32,7 @@\n"
+ " #include <linux/string.h>\n"
+ " #include <linux/vmalloc.h>\n"
+ " #include <linux/err.h>\n"
+ "+#include <linux/srcu.h>\n"
+ " \n"
+ " #include \"zram_drv.h\"\n"
+ " \n"
+ "@@ -53,9 +54,16 @@ static ssize_t name##_show(struct device *d,\t\t\\\n"
+ " }\t\t\t\t\t\t\t\t\t\\\n"
+ " static DEVICE_ATTR_RO(name);\n"
+ " \n"
+ "-static inline int init_done(struct zram *zram)\n"
+ "+static inline bool init_done(struct zram *zram)\n"
+ " {\n"
+ "-\treturn zram->meta != NULL;\n"
+ "+\t/*\n"
+ "+\t * init_done can be used without holding zram->init_lock in\n"
+ "+\t * read/write handler(ie, zram_make_request) but we should make sure\n"
+ "+\t * that zram->init_done should set up after meta initialization is\n"
+ "+\t * done. Look at disksize_store.\n"
+ "+\t */\n"
+ "+\tsmp_rmb();\n"
+ "+\treturn zram->init_done;\n"
+ " }\n"
+ " \n"
+ " static inline struct zram *dev_to_zram(struct device *dev)\n"
+ "@@ -326,6 +334,10 @@ static void zram_meta_free(struct zram_meta *meta)\n"
+ " \tkfree(meta);\n"
+ " }\n"
+ " \n"
+ "+static void rcu_zram_do_nothing(struct rcu_head *unused)\n"
+ "+{\n"
+ "+}\n"
+ "+\n"
+ " static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize)\n"
+ " {\n"
+ " \tchar pool_name[8];\n"
+ "@@ -726,11 +738,8 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)\n"
+ " \t\treturn;\n"
+ " \t}\n"
+ " \n"
+ "-\tzcomp_destroy(zram->comp);\n"
+ " \tzram->max_comp_streams = 1;\n"
+ " \n"
+ "-\tzram_meta_free(zram->meta);\n"
+ "-\tzram->meta = NULL;\n"
+ " \t/* Reset stats */\n"
+ " \tmemset(&zram->stats, 0, sizeof(zram->stats));\n"
+ " \n"
+ "@@ -738,8 +747,12 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)\n"
+ " \tif (reset_capacity)\n"
+ " \t\tset_capacity(zram->disk, 0);\n"
+ " \n"
+ "+\tzram->init_done = false;\n"
+ "+\tcall_srcu(&zram->srcu, &zram->rcu, rcu_zram_do_nothing);\n"
+ "+\tsynchronize_srcu(&zram->srcu);\n"
+ "+\tzram_meta_free(zram->meta);\n"
+ "+\tzcomp_destroy(zram->comp);\n"
+ " \tup_write(&zram->init_lock);\n"
+ "-\n"
+ " \t/*\n"
+ " \t * Revalidate disk out of the init_lock to avoid lockdep splat.\n"
+ " \t * It's okay because disk's capacity is protected by init_lock\n"
+ "@@ -762,10 +775,19 @@ static ssize_t disksize_store(struct device *dev,\n"
+ " \tif (!disksize)\n"
+ " \t\treturn -EINVAL;\n"
+ " \n"
+ "+\tdown_write(&zram->init_lock);\n"
+ "+\tif (init_done(zram)) {\n"
+ "+\t\tpr_info(\"Cannot change disksize for initialized device\\n\");\n"
+ "+\t\tup_write(&zram->init_lock);\n"
+ "+\t\treturn -EBUSY;\n"
+ "+\t}\n"
+ "+\n"
+ " \tdisksize = PAGE_ALIGN(disksize);\n"
+ " \tmeta = zram_meta_alloc(zram->disk->first_minor, disksize);\n"
+ "-\tif (!meta)\n"
+ "+\tif (!meta) {\n"
+ "+\t\tup_write(&zram->init_lock);\n"
+ " \t\treturn -ENOMEM;\n"
+ "+\t}\n"
+ " \n"
+ " \tcomp = zcomp_create(zram->compressor, zram->max_comp_streams);\n"
+ " \tif (IS_ERR(comp)) {\n"
+ "@@ -775,17 +797,17 @@ static ssize_t disksize_store(struct device *dev,\n"
+ " \t\tgoto out_free_meta;\n"
+ " \t}\n"
+ " \n"
+ "-\tdown_write(&zram->init_lock);\n"
+ "-\tif (init_done(zram)) {\n"
+ "-\t\tpr_info(\"Cannot change disksize for initialized device\\n\");\n"
+ "-\t\terr = -EBUSY;\n"
+ "-\t\tgoto out_destroy_comp;\n"
+ "-\t}\n"
+ "-\n"
+ " \tzram->meta = meta;\n"
+ " \tzram->comp = comp;\n"
+ " \tzram->disksize = disksize;\n"
+ " \tset_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);\n"
+ "+\t/*\n"
+ "+\t * Store operation of struct zram fields should complete\n"
+ "+\t * before init_done set up because zram_bvec_rw doesn't\n"
+ "+\t * hold an zram->init_lock.\n"
+ "+\t */\n"
+ "+\tsmp_wmb();\n"
+ "+\tzram->init_done = true;\n"
+ " \tup_write(&zram->init_lock);\n"
+ " \n"
+ " \t/*\n"
+ "@@ -797,10 +819,8 @@ static ssize_t disksize_store(struct device *dev,\n"
+ " \n"
+ " \treturn len;\n"
+ " \n"
+ "-out_destroy_comp:\n"
+ "-\tup_write(&zram->init_lock);\n"
+ "-\tzcomp_destroy(comp);\n"
+ " out_free_meta:\n"
+ "+\tup_write(&zram->init_lock);\n"
+ " \tzram_meta_free(meta);\n"
+ " \treturn err;\n"
+ " }\n"
+ "@@ -905,9 +925,10 @@ out:\n"
+ "  */\n"
+ " static void zram_make_request(struct request_queue *queue, struct bio *bio)\n"
+ " {\n"
+ "+\tint idx;\n"
+ " \tstruct zram *zram = queue->queuedata;\n"
+ " \n"
+ "-\tdown_read(&zram->init_lock);\n"
+ "+\tidx = srcu_read_lock(&zram->srcu);\n"
+ " \tif (unlikely(!init_done(zram)))\n"
+ " \t\tgoto error;\n"
+ " \n"
+ "@@ -918,12 +939,12 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)\n"
+ " \t}\n"
+ " \n"
+ " \t__zram_make_request(zram, bio);\n"
+ "-\tup_read(&zram->init_lock);\n"
+ "+\tsrcu_read_unlock(&zram->srcu, idx);\n"
+ " \n"
+ " \treturn;\n"
+ " \n"
+ " error:\n"
+ "-\tup_read(&zram->init_lock);\n"
+ "+\tsrcu_read_unlock(&zram->srcu, idx);\n"
+ " \tbio_io_error(bio);\n"
+ " }\n"
+ " \n"
+ "@@ -945,18 +966,20 @@ static void zram_slot_free_notify(struct block_device *bdev,\n"
+ " static int zram_rw_page(struct block_device *bdev, sector_t sector,\n"
+ " \t\t       struct page *page, int rw)\n"
+ " {\n"
+ "-\tint offset, err;\n"
+ "+\tint offset, err, idx;\n"
+ " \tu32 index;\n"
+ " \tstruct zram *zram;\n"
+ " \tstruct bio_vec bv;\n"
+ " \n"
+ " \tzram = bdev->bd_disk->private_data;\n"
+ "+\tidx = srcu_read_lock(&zram->srcu);\n"
+ "+\n"
+ " \tif (!valid_io_request(zram, sector, PAGE_SIZE)) {\n"
+ " \t\tatomic64_inc(&zram->stats.invalid_io);\n"
+ "+\t\tsrcu_read_unlock(&zram->srcu, idx);\n"
+ " \t\treturn -EINVAL;\n"
+ " \t}\n"
+ " \n"
+ "-\tdown_read(&zram->init_lock);\n"
+ " \tif (unlikely(!init_done(zram))) {\n"
+ " \t\terr = -EIO;\n"
+ " \t\tgoto out_unlock;\n"
+ "@@ -971,7 +994,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,\n"
+ " \n"
+ " \terr = zram_bvec_rw(zram, &bv, index, offset, rw);\n"
+ " out_unlock:\n"
+ "-\tup_read(&zram->init_lock);\n"
+ "+\tsrcu_read_unlock(&zram->srcu, idx);\n"
+ " \t/*\n"
+ " \t * If I/O fails, just return error(ie, non-zero) without\n"
+ " \t * calling page_endio.\n"
+ "@@ -1041,6 +1064,11 @@ static int create_device(struct zram *zram, int device_id)\n"
+ " \n"
+ " \tinit_rwsem(&zram->init_lock);\n"
+ " \n"
+ "+\tif (init_srcu_struct(&zram->srcu)) {\n"
+ "+\t\tpr_err(\"Error initialize srcu for device %d\\n\", device_id);\n"
+ "+\t\tgoto out;\n"
+ "+\t}\n"
+ "+\n"
+ " \tzram->queue = blk_alloc_queue(GFP_KERNEL);\n"
+ " \tif (!zram->queue) {\n"
+ " \t\tpr_err(\"Error allocating disk queue for device %d\\n\",\n"
+ "@@ -1125,8 +1153,8 @@ static void destroy_device(struct zram *zram)\n"
+ " \n"
+ " \tdel_gendisk(zram->disk);\n"
+ " \tput_disk(zram->disk);\n"
+ "-\n"
+ " \tblk_cleanup_queue(zram->queue);\n"
+ "+\tcleanup_srcu_struct(&zram->srcu);\n"
+ " }\n"
+ " \n"
+ " static int __init zram_init(void)\n"
+ "diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h\n"
+ "index e492f6bf11f1..2042c310aea8 100644\n"
+ "--- a/drivers/block/zram/zram_drv.h\n"
+ "+++ b/drivers/block/zram/zram_drv.h\n"
+ "@@ -105,8 +105,13 @@ struct zram {\n"
+ " \tstruct gendisk *disk;\n"
+ " \tstruct zcomp *comp;\n"
+ " \n"
+ "+\tstruct srcu_struct srcu;\n"
+ "+\tstruct rcu_head rcu;\n"
+ "+\n"
+ " \t/* Prevent concurrent execution of device init, reset and R/W request */\n"
+ " \tstruct rw_semaphore init_lock;\n"
+ "+\tbool init_done;\n"
+ "+\n"
+ " \t/*\n"
+ " \t * This is the limit on amount of *uncompressed* worth of data\n"
+ " \t * we can store in a disk.\n"
+ "-- \n"
+ 1.9.1
 
-ea4d9598e2b79e2ee20429d8fe9ad32f5a95772c1f55d311987b03f7e0cc4330
+cf21096ff23bc4cd0588c65947e33ff24b9443759d5958618bb9a7cf915edf34

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.