diff for duplicates of <20150130144145.GA2840@blaptop> diff --git a/a/1.txt b/N1/1.txt index 73db876..cb6ba17 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -199,3 +199,231 @@ it could make significant difference, either. Hope it addresses your concern. Thanks. + +>From e3f0965e692a3d085bb4ff25a774291e3f269550 Mon Sep 17 00:00:00 2001 +From: Minchan Kim <minchan@kernel.org> +Date: Fri, 30 Jan 2015 09:57:37 +0900 +Subject: [PATCH] zram: remove init_lock in zram_make_request + +Admin could reset zram during I/O operation going on so we have +used zram->init_lock as read-side lock in I/O path to prevent +sudden zram meta freeing. + +However, the init_lock is really troublesome. +We can't do call zram_meta_alloc under init_lock due to lockdep splat +because zram_rw_page is one of the function under reclaim path and +hold it as read_lock while other places in process context hold it +as write_lock. So, we have used allocation out of the lock to avoid +lockdep warn but it's not good for readability and fainally, I met +another lockdep splat between init_lock and cpu_hotplug from +kmem_cache_destroy during working zsmalloc compaction. :( + +Yes, the ideal is to remove horrible init_lock of zram in rw path. +This patch removes it in rw path and instead, add atomic refcount +for meta lifetime management and completion to free meta in process +context. It's important to free meta in process context because +some of resource destruction needs mutex lock, which could be held +if we releases the resource in reclaim context so it's deadlock, +again. + +Signed-off-by: Minchan Kim <minchan@kernel.org> +--- + drivers/block/zram/zram_drv.c | 72 +++++++++++++++++++++++++++++++------------ + drivers/block/zram/zram_drv.h | 2 ++ + 2 files changed, 54 insertions(+), 20 deletions(-) + +diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c +index aa5a4c54f057..9c69c35eace9 100644 +--- a/drivers/block/zram/zram_drv.c ++++ b/drivers/block/zram/zram_drv.c +@@ -55,7 +55,7 @@ static DEVICE_ATTR_RO(name); + + static inline int init_done(struct zram *zram) + { +- return zram->meta != NULL; ++ return zram->disksize != 0; + } + + static inline struct zram *dev_to_zram(struct device *dev) +@@ -350,6 +350,8 @@ static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize) + goto out_error; + } + ++ init_completion(&meta->complete); ++ atomic_set(&meta->refcount, 1); + return meta; + + out_error: +@@ -358,6 +360,23 @@ out_error: + return NULL; + } + ++static inline bool zram_meta_get(struct zram_meta *meta) ++{ ++ if (!atomic_inc_not_zero(&meta->refcount)) ++ return false; ++ return true; ++} ++ ++/* ++ * We want to free zram_meta in process context to avoid ++ * deadlock between reclaim path and any other locks ++ */ ++static inline void zram_meta_put(struct zram_meta *meta) ++{ ++ if (atomic_dec_and_test(&meta->refcount)) ++ complete(&meta->complete); ++} ++ + static void update_position(u32 *index, int *offset, struct bio_vec *bvec) + { + if (*offset + bvec->bv_len >= PAGE_SIZE) +@@ -719,6 +738,9 @@ static void zram_bio_discard(struct zram *zram, u32 index, + + static void zram_reset_device(struct zram *zram, bool reset_capacity) + { ++ struct zram_meta *meta; ++ u64 disksize; ++ + down_write(&zram->init_lock); + + zram->limit_pages = 0; +@@ -728,14 +750,20 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) + return; + } + ++ meta = zram->meta; ++ + zcomp_destroy(zram->comp); + zram->max_comp_streams = 1; +- zram_meta_free(zram->meta, zram->disksize); +- zram->meta = NULL; ++ disksize = zram->disksize; ++ zram_meta_put(meta); ++ /* Read/write handler will not handle further I/O operation. */ ++ zram->disksize = 0; ++ wait_for_completion(&meta->complete); ++ /* I/O operation under all of CPU are done so let's free */ ++ zram_meta_free(zram->meta, disksize); + /* Reset stats */ + memset(&zram->stats, 0, sizeof(zram->stats)); + +- zram->disksize = 0; + if (reset_capacity) + set_capacity(zram->disk, 0); + +@@ -908,23 +936,25 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) + { + struct zram *zram = queue->queuedata; + +- down_read(&zram->init_lock); +- if (unlikely(!init_done(zram))) ++ if (unlikely(!zram_meta_get(zram->meta))) + goto error; + ++ if (unlikely(!init_done(zram))) ++ goto put_meta; ++ + if (!valid_io_request(zram, bio->bi_iter.bi_sector, + bio->bi_iter.bi_size)) { + atomic64_inc(&zram->stats.invalid_io); +- goto error; ++ goto put_meta; + } + + __zram_make_request(zram, bio); +- up_read(&zram->init_lock); ++ zram_meta_put(zram->meta); + + return; +- ++put_meta: ++ zram_meta_put(zram->meta); + error: +- up_read(&zram->init_lock); + bio_io_error(bio); + } + +@@ -946,21 +976,22 @@ static void zram_slot_free_notify(struct block_device *bdev, + static int zram_rw_page(struct block_device *bdev, sector_t sector, + struct page *page, int rw) + { +- int offset, err; ++ int offset, err = -EIO; + u32 index; + struct zram *zram; + struct bio_vec bv; + + zram = bdev->bd_disk->private_data; ++ if (unlikely(!zram_meta_get(zram->meta))) ++ goto out; ++ ++ if (unlikely(!init_done(zram))) ++ goto put_meta; ++ + if (!valid_io_request(zram, sector, PAGE_SIZE)) { + atomic64_inc(&zram->stats.invalid_io); +- return -EINVAL; +- } +- +- down_read(&zram->init_lock); +- if (unlikely(!init_done(zram))) { +- err = -EIO; +- goto out_unlock; ++ err = -EINVAL; ++ goto put_meta; + } + + index = sector >> SECTORS_PER_PAGE_SHIFT; +@@ -971,8 +1002,9 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, + bv.bv_offset = 0; + + err = zram_bvec_rw(zram, &bv, index, offset, rw); +-out_unlock: +- up_read(&zram->init_lock); ++put_meta: ++ zram_meta_put(zram->meta); ++out: + /* + * If I/O fails, just return error(ie, non-zero) without + * calling page_endio. +diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h +index b05a816b09ac..07e55ff84a9c 100644 +--- a/drivers/block/zram/zram_drv.h ++++ b/drivers/block/zram/zram_drv.h +@@ -96,6 +96,8 @@ struct zram_stats { + struct zram_meta { + struct zram_table_entry *table; + struct zs_pool *mem_pool; ++ atomic_t refcount; ++ struct completion complete; /* notify IO under all of cpu are done */ + }; + + struct zram { +-- +1.9.1 + + +> +> > But I guessed most of overhead are from [de]compression, memcpy, clear_page +> > That's why I guessed we don't have measurable difference from that. +> > What's the data pattern if you use iozone? +> +> by "data pattern" you mean usage scenario? well, I usually use zram for +> `make -jX', where X=[4..N]. so N concurrent read-write ops scenario. + +What I meant is what data fills I/O buffer, which is really important +to evaluate zram because the compression/decompression speeds relys on it. + +> +> -ss +> +> > I guess it's really simple pattern compressor can do fast. I used /dev/sda +> > for dd write so more realistic data. Anyway, if we has 10% regression even if +> > the data is simple, I never want to merge it. +> > I will test it carefully and if it turns out lots regression, +> > surely, I will not go with this and send the original patch again. + +-- +Kind regards, +Minchan Kim diff --git a/a/content_digest b/N1/content_digest index 8f1dde6..33a3a03 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -221,6 +221,234 @@ "it could make significant difference, either.\n" "Hope it addresses your concern.\n" "\n" - Thanks. + "Thanks.\n" + "\n" + ">From e3f0965e692a3d085bb4ff25a774291e3f269550 Mon Sep 17 00:00:00 2001\n" + "From: Minchan Kim <minchan@kernel.org>\n" + "Date: Fri, 30 Jan 2015 09:57:37 +0900\n" + "Subject: [PATCH] zram: remove init_lock in zram_make_request\n" + "\n" + "Admin could reset zram during I/O operation going on so we have\n" + "used zram->init_lock as read-side lock in I/O path to prevent\n" + "sudden zram meta freeing.\n" + "\n" + "However, the init_lock is really troublesome.\n" + "We can't do call zram_meta_alloc under init_lock due to lockdep splat\n" + "because zram_rw_page is one of the function under reclaim path and\n" + "hold it as read_lock while other places in process context hold it\n" + "as write_lock. So, we have used allocation out of the lock to avoid\n" + "lockdep warn but it's not good for readability and fainally, I met\n" + "another lockdep splat between init_lock and cpu_hotplug from\n" + "kmem_cache_destroy during working zsmalloc compaction. :(\n" + "\n" + "Yes, the ideal is to remove horrible init_lock of zram in rw path.\n" + "This patch removes it in rw path and instead, add atomic refcount\n" + "for meta lifetime management and completion to free meta in process\n" + "context. It's important to free meta in process context because\n" + "some of resource destruction needs mutex lock, which could be held\n" + "if we releases the resource in reclaim context so it's deadlock,\n" + "again.\n" + "\n" + "Signed-off-by: Minchan Kim <minchan@kernel.org>\n" + "---\n" + " drivers/block/zram/zram_drv.c | 72 +++++++++++++++++++++++++++++++------------\n" + " drivers/block/zram/zram_drv.h | 2 ++\n" + " 2 files changed, 54 insertions(+), 20 deletions(-)\n" + "\n" + "diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c\n" + "index aa5a4c54f057..9c69c35eace9 100644\n" + "--- a/drivers/block/zram/zram_drv.c\n" + "+++ b/drivers/block/zram/zram_drv.c\n" + "@@ -55,7 +55,7 @@ static DEVICE_ATTR_RO(name);\n" + " \n" + " static inline int init_done(struct zram *zram)\n" + " {\n" + "-\treturn zram->meta != NULL;\n" + "+\treturn zram->disksize != 0;\n" + " }\n" + " \n" + " static inline struct zram *dev_to_zram(struct device *dev)\n" + "@@ -350,6 +350,8 @@ static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize)\n" + " \t\tgoto out_error;\n" + " \t}\n" + " \n" + "+\tinit_completion(&meta->complete);\n" + "+\tatomic_set(&meta->refcount, 1);\n" + " \treturn meta;\n" + " \n" + " out_error:\n" + "@@ -358,6 +360,23 @@ out_error:\n" + " \treturn NULL;\n" + " }\n" + " \n" + "+static inline bool zram_meta_get(struct zram_meta *meta)\n" + "+{\n" + "+\tif (!atomic_inc_not_zero(&meta->refcount))\n" + "+\t\treturn false;\n" + "+\treturn true;\n" + "+}\n" + "+\n" + "+/*\n" + "+ * We want to free zram_meta in process context to avoid\n" + "+ * deadlock between reclaim path and any other locks\n" + "+ */\n" + "+static inline void zram_meta_put(struct zram_meta *meta)\n" + "+{\n" + "+\tif (atomic_dec_and_test(&meta->refcount))\n" + "+\t\tcomplete(&meta->complete);\n" + "+}\n" + "+\n" + " static void update_position(u32 *index, int *offset, struct bio_vec *bvec)\n" + " {\n" + " \tif (*offset + bvec->bv_len >= PAGE_SIZE)\n" + "@@ -719,6 +738,9 @@ static void zram_bio_discard(struct zram *zram, u32 index,\n" + " \n" + " static void zram_reset_device(struct zram *zram, bool reset_capacity)\n" + " {\n" + "+\tstruct zram_meta *meta;\n" + "+\tu64 disksize;\n" + "+\n" + " \tdown_write(&zram->init_lock);\n" + " \n" + " \tzram->limit_pages = 0;\n" + "@@ -728,14 +750,20 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)\n" + " \t\treturn;\n" + " \t}\n" + " \n" + "+\tmeta = zram->meta;\n" + "+\n" + " \tzcomp_destroy(zram->comp);\n" + " \tzram->max_comp_streams = 1;\n" + "-\tzram_meta_free(zram->meta, zram->disksize);\n" + "-\tzram->meta = NULL;\n" + "+\tdisksize = zram->disksize;\n" + "+\tzram_meta_put(meta);\n" + "+\t/* Read/write handler will not handle further I/O operation. */\n" + "+\tzram->disksize = 0;\n" + "+\twait_for_completion(&meta->complete);\n" + "+\t/* I/O operation under all of CPU are done so let's free */\n" + "+\tzram_meta_free(zram->meta, disksize);\n" + " \t/* Reset stats */\n" + " \tmemset(&zram->stats, 0, sizeof(zram->stats));\n" + " \n" + "-\tzram->disksize = 0;\n" + " \tif (reset_capacity)\n" + " \t\tset_capacity(zram->disk, 0);\n" + " \n" + "@@ -908,23 +936,25 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)\n" + " {\n" + " \tstruct zram *zram = queue->queuedata;\n" + " \n" + "-\tdown_read(&zram->init_lock);\n" + "-\tif (unlikely(!init_done(zram)))\n" + "+\tif (unlikely(!zram_meta_get(zram->meta)))\n" + " \t\tgoto error;\n" + " \n" + "+\tif (unlikely(!init_done(zram)))\n" + "+\t\tgoto put_meta;\n" + "+\n" + " \tif (!valid_io_request(zram, bio->bi_iter.bi_sector,\n" + " \t\t\t\t\tbio->bi_iter.bi_size)) {\n" + " \t\tatomic64_inc(&zram->stats.invalid_io);\n" + "-\t\tgoto error;\n" + "+\t\tgoto put_meta;\n" + " \t}\n" + " \n" + " \t__zram_make_request(zram, bio);\n" + "-\tup_read(&zram->init_lock);\n" + "+\tzram_meta_put(zram->meta);\n" + " \n" + " \treturn;\n" + "-\n" + "+put_meta:\n" + "+\tzram_meta_put(zram->meta);\n" + " error:\n" + "-\tup_read(&zram->init_lock);\n" + " \tbio_io_error(bio);\n" + " }\n" + " \n" + "@@ -946,21 +976,22 @@ static void zram_slot_free_notify(struct block_device *bdev,\n" + " static int zram_rw_page(struct block_device *bdev, sector_t sector,\n" + " \t\t struct page *page, int rw)\n" + " {\n" + "-\tint offset, err;\n" + "+\tint offset, err = -EIO;\n" + " \tu32 index;\n" + " \tstruct zram *zram;\n" + " \tstruct bio_vec bv;\n" + " \n" + " \tzram = bdev->bd_disk->private_data;\n" + "+\tif (unlikely(!zram_meta_get(zram->meta)))\n" + "+\t\tgoto out;\n" + "+\n" + "+\tif (unlikely(!init_done(zram)))\n" + "+\t\tgoto put_meta;\n" + "+\n" + " \tif (!valid_io_request(zram, sector, PAGE_SIZE)) {\n" + " \t\tatomic64_inc(&zram->stats.invalid_io);\n" + "-\t\treturn -EINVAL;\n" + "-\t}\n" + "-\n" + "-\tdown_read(&zram->init_lock);\n" + "-\tif (unlikely(!init_done(zram))) {\n" + "-\t\terr = -EIO;\n" + "-\t\tgoto out_unlock;\n" + "+\t\terr = -EINVAL;\n" + "+\t\tgoto put_meta;\n" + " \t}\n" + " \n" + " \tindex = sector >> SECTORS_PER_PAGE_SHIFT;\n" + "@@ -971,8 +1002,9 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,\n" + " \tbv.bv_offset = 0;\n" + " \n" + " \terr = zram_bvec_rw(zram, &bv, index, offset, rw);\n" + "-out_unlock:\n" + "-\tup_read(&zram->init_lock);\n" + "+put_meta:\n" + "+\tzram_meta_put(zram->meta);\n" + "+out:\n" + " \t/*\n" + " \t * If I/O fails, just return error(ie, non-zero) without\n" + " \t * calling page_endio.\n" + "diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h\n" + "index b05a816b09ac..07e55ff84a9c 100644\n" + "--- a/drivers/block/zram/zram_drv.h\n" + "+++ b/drivers/block/zram/zram_drv.h\n" + "@@ -96,6 +96,8 @@ struct zram_stats {\n" + " struct zram_meta {\n" + " \tstruct zram_table_entry *table;\n" + " \tstruct zs_pool *mem_pool;\n" + "+\tatomic_t refcount;\n" + "+\tstruct completion complete; /* notify IO under all of cpu are done */\n" + " };\n" + " \n" + " struct zram {\n" + "-- \n" + "1.9.1\n" + "\n" + "\n" + "> \n" + "> > But I guessed most of overhead are from [de]compression, memcpy, clear_page\n" + "> > That's why I guessed we don't have measurable difference from that.\n" + "> > What's the data pattern if you use iozone?\n" + "> \n" + "> by \"data pattern\" you mean usage scenario? well, I usually use zram for\n" + "> `make -jX', where X=[4..N]. so N concurrent read-write ops scenario.\n" + "\n" + "What I meant is what data fills I/O buffer, which is really important\n" + "to evaluate zram because the compression/decompression speeds relys on it.\n" + "\n" + "> \n" + "> \t-ss\n" + "> \n" + "> > I guess it's really simple pattern compressor can do fast. I used /dev/sda\n" + "> > for dd write so more realistic data. Anyway, if we has 10% regression even if\n" + "> > the data is simple, I never want to merge it.\n" + "> > I will test it carefully and if it turns out lots regression,\n" + "> > surely, I will not go with this and send the original patch again.\n" + "\n" + "-- \n" + "Kind regards,\n" + Minchan Kim -bc63a2a801b9d739391a7300cceff3b79574d2e4b04e82fe1fb5a83bde8501e7 +041aa9dc9badd6d564a9efc925b4285b95bfd0b7e19d0cd1d404c9dd541b58b7
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.