All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <20150130144145.GA2840@blaptop>

diff --git a/a/1.txt b/N1/1.txt
index 73db876..cb6ba17 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -199,3 +199,231 @@ it could make significant difference, either.
 Hope it addresses your concern.
 
 Thanks.
+
+>From e3f0965e692a3d085bb4ff25a774291e3f269550 Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Fri, 30 Jan 2015 09:57:37 +0900
+Subject: [PATCH] zram: remove init_lock in zram_make_request
+
+Admin could reset zram during I/O operation going on so we have
+used zram->init_lock as read-side lock in I/O path to prevent
+sudden zram meta freeing.
+
+However, the init_lock is really troublesome.
+We can't do call zram_meta_alloc under init_lock due to lockdep splat
+because zram_rw_page is one of the function under reclaim path and
+hold it as read_lock while other places in process context hold it
+as write_lock. So, we have used allocation out of the lock to avoid
+lockdep warn but it's not good for readability and fainally, I met
+another lockdep splat between init_lock and cpu_hotplug from
+kmem_cache_destroy during working zsmalloc compaction. :(
+
+Yes, the ideal is to remove horrible init_lock of zram in rw path.
+This patch removes it in rw path and instead, add atomic refcount
+for meta lifetime management and completion to free meta in process
+context. It's important to free meta in process context because
+some of resource destruction needs mutex lock, which could be held
+if we releases the resource in reclaim context so it's deadlock,
+again.
+
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+---
+ drivers/block/zram/zram_drv.c | 72 +++++++++++++++++++++++++++++++------------
+ drivers/block/zram/zram_drv.h |  2 ++
+ 2 files changed, 54 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
+index aa5a4c54f057..9c69c35eace9 100644
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -55,7 +55,7 @@ static DEVICE_ATTR_RO(name);
+ 
+ static inline int init_done(struct zram *zram)
+ {
+-	return zram->meta != NULL;
++	return zram->disksize != 0;
+ }
+ 
+ static inline struct zram *dev_to_zram(struct device *dev)
+@@ -350,6 +350,8 @@ static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize)
+ 		goto out_error;
+ 	}
+ 
++	init_completion(&meta->complete);
++	atomic_set(&meta->refcount, 1);
+ 	return meta;
+ 
+ out_error:
+@@ -358,6 +360,23 @@ out_error:
+ 	return NULL;
+ }
+ 
++static inline bool zram_meta_get(struct zram_meta *meta)
++{
++	if (!atomic_inc_not_zero(&meta->refcount))
++		return false;
++	return true;
++}
++
++/*
++ * We want to free zram_meta in process context to avoid
++ * deadlock between reclaim path and any other locks
++ */
++static inline void zram_meta_put(struct zram_meta *meta)
++{
++	if (atomic_dec_and_test(&meta->refcount))
++		complete(&meta->complete);
++}
++
+ static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
+ {
+ 	if (*offset + bvec->bv_len >= PAGE_SIZE)
+@@ -719,6 +738,9 @@ static void zram_bio_discard(struct zram *zram, u32 index,
+ 
+ static void zram_reset_device(struct zram *zram, bool reset_capacity)
+ {
++	struct zram_meta *meta;
++	u64 disksize;
++
+ 	down_write(&zram->init_lock);
+ 
+ 	zram->limit_pages = 0;
+@@ -728,14 +750,20 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)
+ 		return;
+ 	}
+ 
++	meta = zram->meta;
++
+ 	zcomp_destroy(zram->comp);
+ 	zram->max_comp_streams = 1;
+-	zram_meta_free(zram->meta, zram->disksize);
+-	zram->meta = NULL;
++	disksize = zram->disksize;
++	zram_meta_put(meta);
++	/* Read/write handler will not handle further I/O operation. */
++	zram->disksize = 0;
++	wait_for_completion(&meta->complete);
++	/* I/O operation under all of CPU are done so let's free */
++	zram_meta_free(zram->meta, disksize);
+ 	/* Reset stats */
+ 	memset(&zram->stats, 0, sizeof(zram->stats));
+ 
+-	zram->disksize = 0;
+ 	if (reset_capacity)
+ 		set_capacity(zram->disk, 0);
+ 
+@@ -908,23 +936,25 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
+ {
+ 	struct zram *zram = queue->queuedata;
+ 
+-	down_read(&zram->init_lock);
+-	if (unlikely(!init_done(zram)))
++	if (unlikely(!zram_meta_get(zram->meta)))
+ 		goto error;
+ 
++	if (unlikely(!init_done(zram)))
++		goto put_meta;
++
+ 	if (!valid_io_request(zram, bio->bi_iter.bi_sector,
+ 					bio->bi_iter.bi_size)) {
+ 		atomic64_inc(&zram->stats.invalid_io);
+-		goto error;
++		goto put_meta;
+ 	}
+ 
+ 	__zram_make_request(zram, bio);
+-	up_read(&zram->init_lock);
++	zram_meta_put(zram->meta);
+ 
+ 	return;
+-
++put_meta:
++	zram_meta_put(zram->meta);
+ error:
+-	up_read(&zram->init_lock);
+ 	bio_io_error(bio);
+ }
+ 
+@@ -946,21 +976,22 @@ static void zram_slot_free_notify(struct block_device *bdev,
+ static int zram_rw_page(struct block_device *bdev, sector_t sector,
+ 		       struct page *page, int rw)
+ {
+-	int offset, err;
++	int offset, err = -EIO;
+ 	u32 index;
+ 	struct zram *zram;
+ 	struct bio_vec bv;
+ 
+ 	zram = bdev->bd_disk->private_data;
++	if (unlikely(!zram_meta_get(zram->meta)))
++		goto out;
++
++	if (unlikely(!init_done(zram)))
++		goto put_meta;
++
+ 	if (!valid_io_request(zram, sector, PAGE_SIZE)) {
+ 		atomic64_inc(&zram->stats.invalid_io);
+-		return -EINVAL;
+-	}
+-
+-	down_read(&zram->init_lock);
+-	if (unlikely(!init_done(zram))) {
+-		err = -EIO;
+-		goto out_unlock;
++		err = -EINVAL;
++		goto put_meta;
+ 	}
+ 
+ 	index = sector >> SECTORS_PER_PAGE_SHIFT;
+@@ -971,8 +1002,9 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
+ 	bv.bv_offset = 0;
+ 
+ 	err = zram_bvec_rw(zram, &bv, index, offset, rw);
+-out_unlock:
+-	up_read(&zram->init_lock);
++put_meta:
++	zram_meta_put(zram->meta);
++out:
+ 	/*
+ 	 * If I/O fails, just return error(ie, non-zero) without
+ 	 * calling page_endio.
+diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
+index b05a816b09ac..07e55ff84a9c 100644
+--- a/drivers/block/zram/zram_drv.h
++++ b/drivers/block/zram/zram_drv.h
+@@ -96,6 +96,8 @@ struct zram_stats {
+ struct zram_meta {
+ 	struct zram_table_entry *table;
+ 	struct zs_pool *mem_pool;
++	atomic_t refcount;
++	struct completion complete; /* notify IO under all of cpu are done */
+ };
+ 
+ struct zram {
+-- 
+1.9.1
+
+
+> 
+> > But I guessed most of overhead are from [de]compression, memcpy, clear_page
+> > That's why I guessed we don't have measurable difference from that.
+> > What's the data pattern if you use iozone?
+> 
+> by "data pattern" you mean usage scenario? well, I usually use zram for
+> `make -jX', where X=[4..N]. so N concurrent read-write ops scenario.
+
+What I meant is what data fills I/O buffer, which is really important
+to evaluate zram because the compression/decompression speeds relys on it.
+
+> 
+> 	-ss
+> 
+> > I guess it's really simple pattern compressor can do fast. I used /dev/sda
+> > for dd write so more realistic data. Anyway, if we has 10% regression even if
+> > the data is simple, I never want to merge it.
+> > I will test it carefully and if it turns out lots regression,
+> > surely, I will not go with this and send the original patch again.
+
+-- 
+Kind regards,
+Minchan Kim
diff --git a/a/content_digest b/N1/content_digest
index 8f1dde6..33a3a03 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -221,6 +221,234 @@
  "it could make significant difference, either.\n"
  "Hope it addresses your concern.\n"
  "\n"
- Thanks.
+ "Thanks.\n"
+ "\n"
+ ">From e3f0965e692a3d085bb4ff25a774291e3f269550 Mon Sep 17 00:00:00 2001\n"
+ "From: Minchan Kim <minchan@kernel.org>\n"
+ "Date: Fri, 30 Jan 2015 09:57:37 +0900\n"
+ "Subject: [PATCH] zram: remove init_lock in zram_make_request\n"
+ "\n"
+ "Admin could reset zram during I/O operation going on so we have\n"
+ "used zram->init_lock as read-side lock in I/O path to prevent\n"
+ "sudden zram meta freeing.\n"
+ "\n"
+ "However, the init_lock is really troublesome.\n"
+ "We can't do call zram_meta_alloc under init_lock due to lockdep splat\n"
+ "because zram_rw_page is one of the function under reclaim path and\n"
+ "hold it as read_lock while other places in process context hold it\n"
+ "as write_lock. So, we have used allocation out of the lock to avoid\n"
+ "lockdep warn but it's not good for readability and fainally, I met\n"
+ "another lockdep splat between init_lock and cpu_hotplug from\n"
+ "kmem_cache_destroy during working zsmalloc compaction. :(\n"
+ "\n"
+ "Yes, the ideal is to remove horrible init_lock of zram in rw path.\n"
+ "This patch removes it in rw path and instead, add atomic refcount\n"
+ "for meta lifetime management and completion to free meta in process\n"
+ "context. It's important to free meta in process context because\n"
+ "some of resource destruction needs mutex lock, which could be held\n"
+ "if we releases the resource in reclaim context so it's deadlock,\n"
+ "again.\n"
+ "\n"
+ "Signed-off-by: Minchan Kim <minchan@kernel.org>\n"
+ "---\n"
+ " drivers/block/zram/zram_drv.c | 72 +++++++++++++++++++++++++++++++------------\n"
+ " drivers/block/zram/zram_drv.h |  2 ++\n"
+ " 2 files changed, 54 insertions(+), 20 deletions(-)\n"
+ "\n"
+ "diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c\n"
+ "index aa5a4c54f057..9c69c35eace9 100644\n"
+ "--- a/drivers/block/zram/zram_drv.c\n"
+ "+++ b/drivers/block/zram/zram_drv.c\n"
+ "@@ -55,7 +55,7 @@ static DEVICE_ATTR_RO(name);\n"
+ " \n"
+ " static inline int init_done(struct zram *zram)\n"
+ " {\n"
+ "-\treturn zram->meta != NULL;\n"
+ "+\treturn zram->disksize != 0;\n"
+ " }\n"
+ " \n"
+ " static inline struct zram *dev_to_zram(struct device *dev)\n"
+ "@@ -350,6 +350,8 @@ static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize)\n"
+ " \t\tgoto out_error;\n"
+ " \t}\n"
+ " \n"
+ "+\tinit_completion(&meta->complete);\n"
+ "+\tatomic_set(&meta->refcount, 1);\n"
+ " \treturn meta;\n"
+ " \n"
+ " out_error:\n"
+ "@@ -358,6 +360,23 @@ out_error:\n"
+ " \treturn NULL;\n"
+ " }\n"
+ " \n"
+ "+static inline bool zram_meta_get(struct zram_meta *meta)\n"
+ "+{\n"
+ "+\tif (!atomic_inc_not_zero(&meta->refcount))\n"
+ "+\t\treturn false;\n"
+ "+\treturn true;\n"
+ "+}\n"
+ "+\n"
+ "+/*\n"
+ "+ * We want to free zram_meta in process context to avoid\n"
+ "+ * deadlock between reclaim path and any other locks\n"
+ "+ */\n"
+ "+static inline void zram_meta_put(struct zram_meta *meta)\n"
+ "+{\n"
+ "+\tif (atomic_dec_and_test(&meta->refcount))\n"
+ "+\t\tcomplete(&meta->complete);\n"
+ "+}\n"
+ "+\n"
+ " static void update_position(u32 *index, int *offset, struct bio_vec *bvec)\n"
+ " {\n"
+ " \tif (*offset + bvec->bv_len >= PAGE_SIZE)\n"
+ "@@ -719,6 +738,9 @@ static void zram_bio_discard(struct zram *zram, u32 index,\n"
+ " \n"
+ " static void zram_reset_device(struct zram *zram, bool reset_capacity)\n"
+ " {\n"
+ "+\tstruct zram_meta *meta;\n"
+ "+\tu64 disksize;\n"
+ "+\n"
+ " \tdown_write(&zram->init_lock);\n"
+ " \n"
+ " \tzram->limit_pages = 0;\n"
+ "@@ -728,14 +750,20 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)\n"
+ " \t\treturn;\n"
+ " \t}\n"
+ " \n"
+ "+\tmeta = zram->meta;\n"
+ "+\n"
+ " \tzcomp_destroy(zram->comp);\n"
+ " \tzram->max_comp_streams = 1;\n"
+ "-\tzram_meta_free(zram->meta, zram->disksize);\n"
+ "-\tzram->meta = NULL;\n"
+ "+\tdisksize = zram->disksize;\n"
+ "+\tzram_meta_put(meta);\n"
+ "+\t/* Read/write handler will not handle further I/O operation. */\n"
+ "+\tzram->disksize = 0;\n"
+ "+\twait_for_completion(&meta->complete);\n"
+ "+\t/* I/O operation under all of CPU are done so let's free */\n"
+ "+\tzram_meta_free(zram->meta, disksize);\n"
+ " \t/* Reset stats */\n"
+ " \tmemset(&zram->stats, 0, sizeof(zram->stats));\n"
+ " \n"
+ "-\tzram->disksize = 0;\n"
+ " \tif (reset_capacity)\n"
+ " \t\tset_capacity(zram->disk, 0);\n"
+ " \n"
+ "@@ -908,23 +936,25 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)\n"
+ " {\n"
+ " \tstruct zram *zram = queue->queuedata;\n"
+ " \n"
+ "-\tdown_read(&zram->init_lock);\n"
+ "-\tif (unlikely(!init_done(zram)))\n"
+ "+\tif (unlikely(!zram_meta_get(zram->meta)))\n"
+ " \t\tgoto error;\n"
+ " \n"
+ "+\tif (unlikely(!init_done(zram)))\n"
+ "+\t\tgoto put_meta;\n"
+ "+\n"
+ " \tif (!valid_io_request(zram, bio->bi_iter.bi_sector,\n"
+ " \t\t\t\t\tbio->bi_iter.bi_size)) {\n"
+ " \t\tatomic64_inc(&zram->stats.invalid_io);\n"
+ "-\t\tgoto error;\n"
+ "+\t\tgoto put_meta;\n"
+ " \t}\n"
+ " \n"
+ " \t__zram_make_request(zram, bio);\n"
+ "-\tup_read(&zram->init_lock);\n"
+ "+\tzram_meta_put(zram->meta);\n"
+ " \n"
+ " \treturn;\n"
+ "-\n"
+ "+put_meta:\n"
+ "+\tzram_meta_put(zram->meta);\n"
+ " error:\n"
+ "-\tup_read(&zram->init_lock);\n"
+ " \tbio_io_error(bio);\n"
+ " }\n"
+ " \n"
+ "@@ -946,21 +976,22 @@ static void zram_slot_free_notify(struct block_device *bdev,\n"
+ " static int zram_rw_page(struct block_device *bdev, sector_t sector,\n"
+ " \t\t       struct page *page, int rw)\n"
+ " {\n"
+ "-\tint offset, err;\n"
+ "+\tint offset, err = -EIO;\n"
+ " \tu32 index;\n"
+ " \tstruct zram *zram;\n"
+ " \tstruct bio_vec bv;\n"
+ " \n"
+ " \tzram = bdev->bd_disk->private_data;\n"
+ "+\tif (unlikely(!zram_meta_get(zram->meta)))\n"
+ "+\t\tgoto out;\n"
+ "+\n"
+ "+\tif (unlikely(!init_done(zram)))\n"
+ "+\t\tgoto put_meta;\n"
+ "+\n"
+ " \tif (!valid_io_request(zram, sector, PAGE_SIZE)) {\n"
+ " \t\tatomic64_inc(&zram->stats.invalid_io);\n"
+ "-\t\treturn -EINVAL;\n"
+ "-\t}\n"
+ "-\n"
+ "-\tdown_read(&zram->init_lock);\n"
+ "-\tif (unlikely(!init_done(zram))) {\n"
+ "-\t\terr = -EIO;\n"
+ "-\t\tgoto out_unlock;\n"
+ "+\t\terr = -EINVAL;\n"
+ "+\t\tgoto put_meta;\n"
+ " \t}\n"
+ " \n"
+ " \tindex = sector >> SECTORS_PER_PAGE_SHIFT;\n"
+ "@@ -971,8 +1002,9 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,\n"
+ " \tbv.bv_offset = 0;\n"
+ " \n"
+ " \terr = zram_bvec_rw(zram, &bv, index, offset, rw);\n"
+ "-out_unlock:\n"
+ "-\tup_read(&zram->init_lock);\n"
+ "+put_meta:\n"
+ "+\tzram_meta_put(zram->meta);\n"
+ "+out:\n"
+ " \t/*\n"
+ " \t * If I/O fails, just return error(ie, non-zero) without\n"
+ " \t * calling page_endio.\n"
+ "diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h\n"
+ "index b05a816b09ac..07e55ff84a9c 100644\n"
+ "--- a/drivers/block/zram/zram_drv.h\n"
+ "+++ b/drivers/block/zram/zram_drv.h\n"
+ "@@ -96,6 +96,8 @@ struct zram_stats {\n"
+ " struct zram_meta {\n"
+ " \tstruct zram_table_entry *table;\n"
+ " \tstruct zs_pool *mem_pool;\n"
+ "+\tatomic_t refcount;\n"
+ "+\tstruct completion complete; /* notify IO under all of cpu are done */\n"
+ " };\n"
+ " \n"
+ " struct zram {\n"
+ "-- \n"
+ "1.9.1\n"
+ "\n"
+ "\n"
+ "> \n"
+ "> > But I guessed most of overhead are from [de]compression, memcpy, clear_page\n"
+ "> > That's why I guessed we don't have measurable difference from that.\n"
+ "> > What's the data pattern if you use iozone?\n"
+ "> \n"
+ "> by \"data pattern\" you mean usage scenario? well, I usually use zram for\n"
+ "> `make -jX', where X=[4..N]. so N concurrent read-write ops scenario.\n"
+ "\n"
+ "What I meant is what data fills I/O buffer, which is really important\n"
+ "to evaluate zram because the compression/decompression speeds relys on it.\n"
+ "\n"
+ "> \n"
+ "> \t-ss\n"
+ "> \n"
+ "> > I guess it's really simple pattern compressor can do fast. I used /dev/sda\n"
+ "> > for dd write so more realistic data. Anyway, if we has 10% regression even if\n"
+ "> > the data is simple, I never want to merge it.\n"
+ "> > I will test it carefully and if it turns out lots regression,\n"
+ "> > surely, I will not go with this and send the original patch again.\n"
+ "\n"
+ "-- \n"
+ "Kind regards,\n"
+ Minchan Kim
 
-bc63a2a801b9d739391a7300cceff3b79574d2e4b04e82fe1fb5a83bde8501e7
+041aa9dc9badd6d564a9efc925b4285b95bfd0b7e19d0cd1d404c9dd541b58b7

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.