diff for duplicates of <20150126160007.GC528@blaptop> diff --git a/a/1.txt b/N1/1.txt index 0434ec5..1474ac7 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -183,3 +183,261 @@ How about this? It's based on Ganesh's patch. https://lkml.org/lkml/2015/1/24/50 + +>From afda9fd2f6c40dd0745d8a6babe78c5cbdceddf5 Mon Sep 17 00:00:00 2001 +From: Minchan Kim <minchan@kernel.org> +Date: Mon, 26 Jan 2015 14:34:10 +0900 +Subject: [RFC] zram: remove init_lock in zram_make_request + +Admin could reset zram during I/O operation going on so we have +used zram->init_lock as read-side lock in I/O path to prevent +sudden zram meta freeing. + +However, the init_lock is really troublesome. +We can't do call zram_meta_alloc under init_lock due to lockdep splat +because zram_rw_page is one of the function under reclaim path and +hold it as read_lock while other places in process context hold it +as write_lock. So, we have used allocation out of the lock to avoid +lockdep warn but it's not good for readability and fainally, I met +another lockdep splat between init_lock and cpu_hotpulug from +kmem_cache_destroy during wokring zsmalloc compaction. :( + +Yes, the ideal is to remove horrible init_lock of zram in rw path. +This patch removes it in rw path and instead, put init_done bool +variable to check initialization done with smp_[wmb|rmb] and +srcu_[un]read_lock to prevent sudden zram meta freeing +during I/O operation. + +Signed-off-by: Minchan Kim <minchan@kernel.org> +--- + drivers/block/zram/zram_drv.c | 76 +++++++++++++++++++++++++++++-------------- + drivers/block/zram/zram_drv.h | 5 +++ + 2 files changed, 57 insertions(+), 24 deletions(-) + +diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c +index a598ada817f0..e06ff975f997 100644 +--- a/drivers/block/zram/zram_drv.c ++++ b/drivers/block/zram/zram_drv.c +@@ -32,6 +32,7 @@ + #include <linux/string.h> + #include <linux/vmalloc.h> + #include <linux/err.h> ++#include <linux/srcu.h> + + #include "zram_drv.h" + +@@ -53,9 +54,16 @@ static ssize_t name##_show(struct device *d, \ + } \ + static DEVICE_ATTR_RO(name); + +-static inline int init_done(struct zram *zram) ++static inline bool init_done(struct zram *zram) + { +- return zram->meta != NULL; ++ /* ++ * init_done can be used without holding zram->init_lock in ++ * read/write handler(ie, zram_make_request) but we should make sure ++ * that zram->init_done should set up after meta initialization is ++ * done. Look at disksize_store. ++ */ ++ smp_rmb(); ++ return zram->init_done; + } + + static inline struct zram *dev_to_zram(struct device *dev) +@@ -326,6 +334,10 @@ static void zram_meta_free(struct zram_meta *meta) + kfree(meta); + } + ++static void rcu_zram_do_nothing(struct rcu_head *unused) ++{ ++} ++ + static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize) + { + char pool_name[8]; +@@ -726,11 +738,8 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) + return; + } + +- zcomp_destroy(zram->comp); + zram->max_comp_streams = 1; + +- zram_meta_free(zram->meta); +- zram->meta = NULL; + /* Reset stats */ + memset(&zram->stats, 0, sizeof(zram->stats)); + +@@ -738,8 +747,12 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) + if (reset_capacity) + set_capacity(zram->disk, 0); + ++ zram->init_done = false; ++ call_srcu(&zram->srcu, &zram->rcu, rcu_zram_do_nothing); ++ synchronize_srcu(&zram->srcu); ++ zram_meta_free(zram->meta); ++ zcomp_destroy(zram->comp); + up_write(&zram->init_lock); +- + /* + * Revalidate disk out of the init_lock to avoid lockdep splat. + * It's okay because disk's capacity is protected by init_lock +@@ -762,10 +775,19 @@ static ssize_t disksize_store(struct device *dev, + if (!disksize) + return -EINVAL; + ++ down_write(&zram->init_lock); ++ if (init_done(zram)) { ++ pr_info("Cannot change disksize for initialized device\n"); ++ up_write(&zram->init_lock); ++ return -EBUSY; ++ } ++ + disksize = PAGE_ALIGN(disksize); + meta = zram_meta_alloc(zram->disk->first_minor, disksize); +- if (!meta) ++ if (!meta) { ++ up_write(&zram->init_lock); + return -ENOMEM; ++ } + + comp = zcomp_create(zram->compressor, zram->max_comp_streams); + if (IS_ERR(comp)) { +@@ -775,17 +797,17 @@ static ssize_t disksize_store(struct device *dev, + goto out_free_meta; + } + +- down_write(&zram->init_lock); +- if (init_done(zram)) { +- pr_info("Cannot change disksize for initialized device\n"); +- err = -EBUSY; +- goto out_destroy_comp; +- } +- + zram->meta = meta; + zram->comp = comp; + zram->disksize = disksize; + set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); ++ /* ++ * Store operation of struct zram fields should complete ++ * before init_done set up because zram_bvec_rw doesn't ++ * hold an zram->init_lock. ++ */ ++ smp_wmb(); ++ zram->init_done = true; + up_write(&zram->init_lock); + + /* +@@ -797,10 +819,8 @@ static ssize_t disksize_store(struct device *dev, + + return len; + +-out_destroy_comp: +- up_write(&zram->init_lock); +- zcomp_destroy(comp); + out_free_meta: ++ up_write(&zram->init_lock); + zram_meta_free(meta); + return err; + } +@@ -905,9 +925,10 @@ out: + */ + static void zram_make_request(struct request_queue *queue, struct bio *bio) + { ++ int idx; + struct zram *zram = queue->queuedata; + +- down_read(&zram->init_lock); ++ idx = srcu_read_lock(&zram->srcu); + if (unlikely(!init_done(zram))) + goto error; + +@@ -918,12 +939,12 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) + } + + __zram_make_request(zram, bio); +- up_read(&zram->init_lock); ++ srcu_read_unlock(&zram->srcu, idx); + + return; + + error: +- up_read(&zram->init_lock); ++ srcu_read_unlock(&zram->srcu, idx); + bio_io_error(bio); + } + +@@ -945,18 +966,20 @@ static void zram_slot_free_notify(struct block_device *bdev, + static int zram_rw_page(struct block_device *bdev, sector_t sector, + struct page *page, int rw) + { +- int offset, err; ++ int offset, err, idx; + u32 index; + struct zram *zram; + struct bio_vec bv; + + zram = bdev->bd_disk->private_data; ++ idx = srcu_read_lock(&zram->srcu); ++ + if (!valid_io_request(zram, sector, PAGE_SIZE)) { + atomic64_inc(&zram->stats.invalid_io); ++ srcu_read_unlock(&zram->srcu, idx); + return -EINVAL; + } + +- down_read(&zram->init_lock); + if (unlikely(!init_done(zram))) { + err = -EIO; + goto out_unlock; +@@ -971,7 +994,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, + + err = zram_bvec_rw(zram, &bv, index, offset, rw); + out_unlock: +- up_read(&zram->init_lock); ++ srcu_read_unlock(&zram->srcu, idx); + /* + * If I/O fails, just return error(ie, non-zero) without + * calling page_endio. +@@ -1041,6 +1064,11 @@ static int create_device(struct zram *zram, int device_id) + + init_rwsem(&zram->init_lock); + ++ if (init_srcu_struct(&zram->srcu)) { ++ pr_err("Error initialize srcu for device %d\n", device_id); ++ goto out; ++ } ++ + zram->queue = blk_alloc_queue(GFP_KERNEL); + if (!zram->queue) { + pr_err("Error allocating disk queue for device %d\n", +@@ -1125,8 +1153,8 @@ static void destroy_device(struct zram *zram) + + del_gendisk(zram->disk); + put_disk(zram->disk); +- + blk_cleanup_queue(zram->queue); ++ cleanup_srcu_struct(&zram->srcu); + } + + static int __init zram_init(void) +diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h +index e492f6bf11f1..2042c310aea8 100644 +--- a/drivers/block/zram/zram_drv.h ++++ b/drivers/block/zram/zram_drv.h +@@ -105,8 +105,13 @@ struct zram { + struct gendisk *disk; + struct zcomp *comp; + ++ struct srcu_struct srcu; ++ struct rcu_head rcu; ++ + /* Prevent concurrent execution of device init, reset and R/W request */ + struct rw_semaphore init_lock; ++ bool init_done; ++ + /* + * This is the limit on amount of *uncompressed* worth of data + * we can store in a disk. +-- +1.9.1 diff --git a/a/content_digest b/N1/content_digest index 6bac028..a172b59 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -200,6 +200,264 @@ "How about this?\n" "\n" "It's based on Ganesh's patch.\n" - https://lkml.org/lkml/2015/1/24/50 + "https://lkml.org/lkml/2015/1/24/50\n" + "\n" + ">From afda9fd2f6c40dd0745d8a6babe78c5cbdceddf5 Mon Sep 17 00:00:00 2001\n" + "From: Minchan Kim <minchan@kernel.org>\n" + "Date: Mon, 26 Jan 2015 14:34:10 +0900\n" + "Subject: [RFC] zram: remove init_lock in zram_make_request\n" + "\n" + "Admin could reset zram during I/O operation going on so we have\n" + "used zram->init_lock as read-side lock in I/O path to prevent\n" + "sudden zram meta freeing.\n" + "\n" + "However, the init_lock is really troublesome.\n" + "We can't do call zram_meta_alloc under init_lock due to lockdep splat\n" + "because zram_rw_page is one of the function under reclaim path and\n" + "hold it as read_lock while other places in process context hold it\n" + "as write_lock. So, we have used allocation out of the lock to avoid\n" + "lockdep warn but it's not good for readability and fainally, I met\n" + "another lockdep splat between init_lock and cpu_hotpulug from\n" + "kmem_cache_destroy during wokring zsmalloc compaction. :(\n" + "\n" + "Yes, the ideal is to remove horrible init_lock of zram in rw path.\n" + "This patch removes it in rw path and instead, put init_done bool\n" + "variable to check initialization done with smp_[wmb|rmb] and\n" + "srcu_[un]read_lock to prevent sudden zram meta freeing\n" + "during I/O operation.\n" + "\n" + "Signed-off-by: Minchan Kim <minchan@kernel.org>\n" + "---\n" + " drivers/block/zram/zram_drv.c | 76 +++++++++++++++++++++++++++++--------------\n" + " drivers/block/zram/zram_drv.h | 5 +++\n" + " 2 files changed, 57 insertions(+), 24 deletions(-)\n" + "\n" + "diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c\n" + "index a598ada817f0..e06ff975f997 100644\n" + "--- a/drivers/block/zram/zram_drv.c\n" + "+++ b/drivers/block/zram/zram_drv.c\n" + "@@ -32,6 +32,7 @@\n" + " #include <linux/string.h>\n" + " #include <linux/vmalloc.h>\n" + " #include <linux/err.h>\n" + "+#include <linux/srcu.h>\n" + " \n" + " #include \"zram_drv.h\"\n" + " \n" + "@@ -53,9 +54,16 @@ static ssize_t name##_show(struct device *d,\t\t\\\n" + " }\t\t\t\t\t\t\t\t\t\\\n" + " static DEVICE_ATTR_RO(name);\n" + " \n" + "-static inline int init_done(struct zram *zram)\n" + "+static inline bool init_done(struct zram *zram)\n" + " {\n" + "-\treturn zram->meta != NULL;\n" + "+\t/*\n" + "+\t * init_done can be used without holding zram->init_lock in\n" + "+\t * read/write handler(ie, zram_make_request) but we should make sure\n" + "+\t * that zram->init_done should set up after meta initialization is\n" + "+\t * done. Look at disksize_store.\n" + "+\t */\n" + "+\tsmp_rmb();\n" + "+\treturn zram->init_done;\n" + " }\n" + " \n" + " static inline struct zram *dev_to_zram(struct device *dev)\n" + "@@ -326,6 +334,10 @@ static void zram_meta_free(struct zram_meta *meta)\n" + " \tkfree(meta);\n" + " }\n" + " \n" + "+static void rcu_zram_do_nothing(struct rcu_head *unused)\n" + "+{\n" + "+}\n" + "+\n" + " static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize)\n" + " {\n" + " \tchar pool_name[8];\n" + "@@ -726,11 +738,8 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)\n" + " \t\treturn;\n" + " \t}\n" + " \n" + "-\tzcomp_destroy(zram->comp);\n" + " \tzram->max_comp_streams = 1;\n" + " \n" + "-\tzram_meta_free(zram->meta);\n" + "-\tzram->meta = NULL;\n" + " \t/* Reset stats */\n" + " \tmemset(&zram->stats, 0, sizeof(zram->stats));\n" + " \n" + "@@ -738,8 +747,12 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)\n" + " \tif (reset_capacity)\n" + " \t\tset_capacity(zram->disk, 0);\n" + " \n" + "+\tzram->init_done = false;\n" + "+\tcall_srcu(&zram->srcu, &zram->rcu, rcu_zram_do_nothing);\n" + "+\tsynchronize_srcu(&zram->srcu);\n" + "+\tzram_meta_free(zram->meta);\n" + "+\tzcomp_destroy(zram->comp);\n" + " \tup_write(&zram->init_lock);\n" + "-\n" + " \t/*\n" + " \t * Revalidate disk out of the init_lock to avoid lockdep splat.\n" + " \t * It's okay because disk's capacity is protected by init_lock\n" + "@@ -762,10 +775,19 @@ static ssize_t disksize_store(struct device *dev,\n" + " \tif (!disksize)\n" + " \t\treturn -EINVAL;\n" + " \n" + "+\tdown_write(&zram->init_lock);\n" + "+\tif (init_done(zram)) {\n" + "+\t\tpr_info(\"Cannot change disksize for initialized device\\n\");\n" + "+\t\tup_write(&zram->init_lock);\n" + "+\t\treturn -EBUSY;\n" + "+\t}\n" + "+\n" + " \tdisksize = PAGE_ALIGN(disksize);\n" + " \tmeta = zram_meta_alloc(zram->disk->first_minor, disksize);\n" + "-\tif (!meta)\n" + "+\tif (!meta) {\n" + "+\t\tup_write(&zram->init_lock);\n" + " \t\treturn -ENOMEM;\n" + "+\t}\n" + " \n" + " \tcomp = zcomp_create(zram->compressor, zram->max_comp_streams);\n" + " \tif (IS_ERR(comp)) {\n" + "@@ -775,17 +797,17 @@ static ssize_t disksize_store(struct device *dev,\n" + " \t\tgoto out_free_meta;\n" + " \t}\n" + " \n" + "-\tdown_write(&zram->init_lock);\n" + "-\tif (init_done(zram)) {\n" + "-\t\tpr_info(\"Cannot change disksize for initialized device\\n\");\n" + "-\t\terr = -EBUSY;\n" + "-\t\tgoto out_destroy_comp;\n" + "-\t}\n" + "-\n" + " \tzram->meta = meta;\n" + " \tzram->comp = comp;\n" + " \tzram->disksize = disksize;\n" + " \tset_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);\n" + "+\t/*\n" + "+\t * Store operation of struct zram fields should complete\n" + "+\t * before init_done set up because zram_bvec_rw doesn't\n" + "+\t * hold an zram->init_lock.\n" + "+\t */\n" + "+\tsmp_wmb();\n" + "+\tzram->init_done = true;\n" + " \tup_write(&zram->init_lock);\n" + " \n" + " \t/*\n" + "@@ -797,10 +819,8 @@ static ssize_t disksize_store(struct device *dev,\n" + " \n" + " \treturn len;\n" + " \n" + "-out_destroy_comp:\n" + "-\tup_write(&zram->init_lock);\n" + "-\tzcomp_destroy(comp);\n" + " out_free_meta:\n" + "+\tup_write(&zram->init_lock);\n" + " \tzram_meta_free(meta);\n" + " \treturn err;\n" + " }\n" + "@@ -905,9 +925,10 @@ out:\n" + " */\n" + " static void zram_make_request(struct request_queue *queue, struct bio *bio)\n" + " {\n" + "+\tint idx;\n" + " \tstruct zram *zram = queue->queuedata;\n" + " \n" + "-\tdown_read(&zram->init_lock);\n" + "+\tidx = srcu_read_lock(&zram->srcu);\n" + " \tif (unlikely(!init_done(zram)))\n" + " \t\tgoto error;\n" + " \n" + "@@ -918,12 +939,12 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)\n" + " \t}\n" + " \n" + " \t__zram_make_request(zram, bio);\n" + "-\tup_read(&zram->init_lock);\n" + "+\tsrcu_read_unlock(&zram->srcu, idx);\n" + " \n" + " \treturn;\n" + " \n" + " error:\n" + "-\tup_read(&zram->init_lock);\n" + "+\tsrcu_read_unlock(&zram->srcu, idx);\n" + " \tbio_io_error(bio);\n" + " }\n" + " \n" + "@@ -945,18 +966,20 @@ static void zram_slot_free_notify(struct block_device *bdev,\n" + " static int zram_rw_page(struct block_device *bdev, sector_t sector,\n" + " \t\t struct page *page, int rw)\n" + " {\n" + "-\tint offset, err;\n" + "+\tint offset, err, idx;\n" + " \tu32 index;\n" + " \tstruct zram *zram;\n" + " \tstruct bio_vec bv;\n" + " \n" + " \tzram = bdev->bd_disk->private_data;\n" + "+\tidx = srcu_read_lock(&zram->srcu);\n" + "+\n" + " \tif (!valid_io_request(zram, sector, PAGE_SIZE)) {\n" + " \t\tatomic64_inc(&zram->stats.invalid_io);\n" + "+\t\tsrcu_read_unlock(&zram->srcu, idx);\n" + " \t\treturn -EINVAL;\n" + " \t}\n" + " \n" + "-\tdown_read(&zram->init_lock);\n" + " \tif (unlikely(!init_done(zram))) {\n" + " \t\terr = -EIO;\n" + " \t\tgoto out_unlock;\n" + "@@ -971,7 +994,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,\n" + " \n" + " \terr = zram_bvec_rw(zram, &bv, index, offset, rw);\n" + " out_unlock:\n" + "-\tup_read(&zram->init_lock);\n" + "+\tsrcu_read_unlock(&zram->srcu, idx);\n" + " \t/*\n" + " \t * If I/O fails, just return error(ie, non-zero) without\n" + " \t * calling page_endio.\n" + "@@ -1041,6 +1064,11 @@ static int create_device(struct zram *zram, int device_id)\n" + " \n" + " \tinit_rwsem(&zram->init_lock);\n" + " \n" + "+\tif (init_srcu_struct(&zram->srcu)) {\n" + "+\t\tpr_err(\"Error initialize srcu for device %d\\n\", device_id);\n" + "+\t\tgoto out;\n" + "+\t}\n" + "+\n" + " \tzram->queue = blk_alloc_queue(GFP_KERNEL);\n" + " \tif (!zram->queue) {\n" + " \t\tpr_err(\"Error allocating disk queue for device %d\\n\",\n" + "@@ -1125,8 +1153,8 @@ static void destroy_device(struct zram *zram)\n" + " \n" + " \tdel_gendisk(zram->disk);\n" + " \tput_disk(zram->disk);\n" + "-\n" + " \tblk_cleanup_queue(zram->queue);\n" + "+\tcleanup_srcu_struct(&zram->srcu);\n" + " }\n" + " \n" + " static int __init zram_init(void)\n" + "diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h\n" + "index e492f6bf11f1..2042c310aea8 100644\n" + "--- a/drivers/block/zram/zram_drv.h\n" + "+++ b/drivers/block/zram/zram_drv.h\n" + "@@ -105,8 +105,13 @@ struct zram {\n" + " \tstruct gendisk *disk;\n" + " \tstruct zcomp *comp;\n" + " \n" + "+\tstruct srcu_struct srcu;\n" + "+\tstruct rcu_head rcu;\n" + "+\n" + " \t/* Prevent concurrent execution of device init, reset and R/W request */\n" + " \tstruct rw_semaphore init_lock;\n" + "+\tbool init_done;\n" + "+\n" + " \t/*\n" + " \t * This is the limit on amount of *uncompressed* worth of data\n" + " \t * we can store in a disk.\n" + "-- \n" + 1.9.1 -ea4d9598e2b79e2ee20429d8fe9ad32f5a95772c1f55d311987b03f7e0cc4330 +cf21096ff23bc4cd0588c65947e33ff24b9443759d5958618bb9a7cf915edf34
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.