* [RFC 1/7] zram: inlining zram_compress
2017-06-12 5:02 [RFC 0/7] writeback incompressible pages to storage Minchan Kim
@ 2017-06-12 5:02 ` Minchan Kim
2017-06-12 5:02 ` [RFC 2/7] zram: rename zram_decompress_page with __zram_bvec_read Minchan Kim
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Minchan Kim @ 2017-06-12 5:02 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-kernel, Juneho Choi, Sergey Senozhatsky, kernel-team,
Minchan Kim
zram_compress does several things, compress, entry alloc and check
limitation. I did for just readbility but it hurts modulization.:(
So this patch removes zram_compress functions and inline it in
__zram_bvec_write for upcoming patches.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
drivers/block/zram/zram_drv.c | 81 +++++++++++++++++--------------------------
1 file changed, 31 insertions(+), 50 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 5440d1a..bed534e 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -692,22 +692,45 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
return ret;
}
-static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
- struct page *page, struct zram_entry **out_entry,
- unsigned int *out_comp_len)
+static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
{
int ret;
- unsigned int comp_len;
- void *src;
+ struct zram_entry *uninitialized_var(entry);
+ unsigned int uninitialized_var(comp_len);
+ void *src, *dst, *mem;
+ struct zcomp_strm *zstrm;
+ struct page *page = bvec->bv_page;
+ u32 checksum;
+ enum zram_pageflags flags = 0;
+ unsigned long uninitialized_var(element);
unsigned long alloced_pages;
- struct zram_entry *entry = NULL;
+
+ mem = kmap_atomic(page);
+ if (page_same_filled(mem, &element)) {
+ kunmap_atomic(mem);
+ /* Free memory associated with this sector now. */
+ flags = ZRAM_SAME;
+ atomic64_inc(&zram->stats.same_pages);
+ goto out;
+ }
+ kunmap_atomic(mem);
+
+ entry = zram_dedup_find(zram, page, &checksum);
+ if (entry) {
+ comp_len = entry->len;
+ flags = ZRAM_DUP;
+ atomic64_add(comp_len, &zram->stats.dup_data_size);
+ goto out;
+ }
compress_again:
+ zstrm = zcomp_stream_get(zram->comp);
src = kmap_atomic(page);
- ret = zcomp_compress(*zstrm, src, &comp_len);
+ ret = zcomp_compress(zstrm, src, &comp_len);
kunmap_atomic(src);
if (unlikely(ret)) {
+ zcomp_stream_put(zram->comp);
pr_err("Compression failed! err=%d\n", ret);
if (entry)
zram_entry_free(zram, entry);
@@ -742,7 +765,6 @@ static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
entry = zram_entry_alloc(zram, comp_len,
GFP_NOIO | __GFP_HIGHMEM |
__GFP_MOVABLE);
- *zstrm = zcomp_stream_get(zram->comp);
if (entry)
goto compress_again;
return -ENOMEM;
@@ -752,52 +774,11 @@ static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
update_used_max(zram, alloced_pages);
if (zram->limit_pages && alloced_pages > zram->limit_pages) {
+ zcomp_stream_put(zram->comp);
zram_entry_free(zram, entry);
return -ENOMEM;
}
- *out_entry = entry;
- *out_comp_len = comp_len;
- return 0;
-}
-
-static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
-{
- int ret;
- struct zram_entry *uninitialized_var(entry);
- unsigned int uninitialized_var(comp_len);
- void *src, *dst, *mem;
- struct zcomp_strm *zstrm;
- struct page *page = bvec->bv_page;
- u32 checksum;
- enum zram_pageflags flags = 0;
- unsigned long uninitialized_var(element);
-
- mem = kmap_atomic(page);
- if (page_same_filled(mem, &element)) {
- kunmap_atomic(mem);
- /* Free memory associated with this sector now. */
- flags = ZRAM_SAME;
- atomic64_inc(&zram->stats.same_pages);
- goto out;
- }
- kunmap_atomic(mem);
-
- entry = zram_dedup_find(zram, page, &checksum);
- if (entry) {
- comp_len = entry->len;
- flags = ZRAM_DUP;
- atomic64_add(comp_len, &zram->stats.dup_data_size);
- goto out;
- }
-
- zstrm = zcomp_stream_get(zram->comp);
- ret = zram_compress(zram, &zstrm, page, &entry, &comp_len);
- if (ret) {
- zcomp_stream_put(zram->comp);
- return ret;
- }
-
dst = zs_map_object(zram->mem_pool,
zram_entry_handle(zram, entry), ZS_MM_WO);
--
2.7.4
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC 2/7] zram: rename zram_decompress_page with __zram_bvec_read
2017-06-12 5:02 [RFC 0/7] writeback incompressible pages to storage Minchan Kim
2017-06-12 5:02 ` [RFC 1/7] zram: inlining zram_compress Minchan Kim
@ 2017-06-12 5:02 ` Minchan Kim
2017-06-12 5:02 ` [RFC 3/7] zram: add interface to specify backing device Minchan Kim
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Minchan Kim @ 2017-06-12 5:02 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-kernel, Juneho Choi, Sergey Senozhatsky, kernel-team,
Minchan Kim
zram_decompress_page naming is not proper because it doesn't
decompress if page was dedup hit or stored with compression.
Use more abstract term and consistent with write path function
__zram_bvec_write.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
drivers/block/zram/zram_drv.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index bed534e..a0c304b 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -620,7 +620,7 @@ static void zram_free_page(struct zram *zram, size_t index)
zram_set_obj_size(zram, index, 0);
}
-static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
+static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index)
{
int ret;
struct zram_entry *entry;
@@ -673,7 +673,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
return -ENOMEM;
}
- ret = zram_decompress_page(zram, page, index);
+ ret = __zram_bvec_read(zram, page, index);
if (unlikely(ret))
goto out;
@@ -833,7 +833,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
if (!page)
return -ENOMEM;
- ret = zram_decompress_page(zram, page, index);
+ ret = __zram_bvec_read(zram, page, index);
if (ret)
goto out;
--
2.7.4
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC 3/7] zram: add interface to specify backing device
2017-06-12 5:02 [RFC 0/7] writeback incompressible pages to storage Minchan Kim
2017-06-12 5:02 ` [RFC 1/7] zram: inlining zram_compress Minchan Kim
2017-06-12 5:02 ` [RFC 2/7] zram: rename zram_decompress_page with __zram_bvec_read Minchan Kim
@ 2017-06-12 5:02 ` Minchan Kim
2017-06-12 5:02 ` [RFC 4/7] zram: add free space management in " Minchan Kim
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Minchan Kim @ 2017-06-12 5:02 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-kernel, Juneho Choi, Sergey Senozhatsky, kernel-team,
Minchan Kim
For writeback feature, user should set up backing device before
the zram working. This patch enables the interface via
/sys/block/zramX/backing_dev.
Currently, it supports block device only but it could be enhanced
for file as well.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
drivers/block/zram/zram_drv.c | 163 ++++++++++++++++++++++++++++++++++++++++++
drivers/block/zram/zram_drv.h | 5 ++
2 files changed, 168 insertions(+)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index a0c304b..dcb6f83 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -271,6 +271,163 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}
+#ifdef CONFIG_ZRAM_WRITEBACK
+static bool zram_wb_enabled(struct zram *zram)
+{
+ return zram->backing_dev;
+}
+
+static void reset_bdev(struct zram *zram)
+{
+ struct inode *inode;
+ struct address_space *mapping;
+ struct block_device *bdev;
+
+ if (!zram_wb_enabled(zram))
+ return;
+
+ mapping = zram->backing_dev->f_mapping;
+ inode = mapping->host;
+ bdev = I_BDEV(inode);
+
+ if (zram->old_block_size)
+ set_blocksize(bdev, zram->old_block_size);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+ /* hope filp_close flush all of IO */
+ filp_close(zram->backing_dev, NULL);
+ zram->backing_dev = NULL;
+ zram->old_block_size = 0;
+ zram->bdev = NULL;
+}
+
+static ssize_t backing_dev_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+ struct file *file = zram->backing_dev;
+ char *p;
+ ssize_t ret;
+
+ down_read(&zram->init_lock);
+ if (!zram_wb_enabled(zram)) {
+ memcpy(buf, "none\n", 5);
+ up_read(&zram->init_lock);
+ return 5;
+ }
+
+ p = file_path(file, buf, PAGE_SIZE - 1);
+ if (IS_ERR(p)) {
+ ret = PTR_ERR(p);
+ goto out;
+ }
+
+ ret = strlen(p);
+ memmove(buf, p, ret);
+ buf[ret++] = '\n';
+out:
+ up_read(&zram->init_lock);
+ return ret;
+}
+
+static ssize_t backing_dev_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ char *file_name;
+ struct filename *name = NULL;
+ struct file *backing_dev = NULL;
+ struct inode *inode;
+ struct address_space *mapping;
+ unsigned int old_block_size = 0;
+ struct block_device *bdev = NULL;
+ int err;
+ size_t sz;
+ struct zram *zram = dev_to_zram(dev);
+
+ file_name = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!file_name)
+ return -ENOMEM;
+
+ down_write(&zram->init_lock);
+ if (init_done(zram)) {
+ pr_info("Can't setup backing device for initialized device\n");
+ err = -EBUSY;
+ goto out;
+ }
+
+ strlcpy(file_name, buf, len);
+ /* ignore trailing newline */
+ sz = strlen(file_name);
+ if (sz > 0 && file_name[sz - 1] == '\n')
+ file_name[sz - 1] = 0x00;
+
+ name = getname_kernel(file_name);
+ if (IS_ERR(name)) {
+ err = PTR_ERR(name);
+ name = NULL;
+ goto out;
+ }
+
+ backing_dev = file_open_name(name, O_RDWR|O_LARGEFILE, 0);
+ if (IS_ERR(backing_dev)) {
+ err = PTR_ERR(backing_dev);
+ backing_dev = NULL;
+ goto out;
+ }
+
+ mapping = backing_dev->f_mapping;
+ inode = mapping->host;
+
+ /* Support only block device in this moment */
+ if (!S_ISBLK(inode->i_mode)) {
+ err = -ENOTBLK;
+ goto out;
+ }
+
+ bdev = bdgrab(I_BDEV(inode));
+ err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
+ if (err < 0)
+ goto out;
+
+ old_block_size = block_size(bdev);
+ err = set_blocksize(bdev, PAGE_SIZE);
+ if (err)
+ goto out;
+
+ reset_bdev(zram);
+
+ zram->old_block_size = old_block_size;
+ zram->bdev = bdev;
+ zram->backing_dev = backing_dev;
+ up_write(&zram->init_lock);
+
+ pr_info("setup backing device %s\n", file_name);
+
+ putname(name);
+ kfree(file_name);
+
+ return len;
+out:
+ if (bdev)
+ blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+
+ if (backing_dev)
+ filp_close(backing_dev, NULL);
+
+ if (name)
+ putname(name);
+ up_write(&zram->init_lock);
+
+ kfree(file_name);
+
+ return err;
+}
+
+#else
+static bool zram_wb_enabled(struct zram *zram) { return false; }
+static void reset_bdev(struct zram *zram) {};
+#endif
+
+
/*
* We switched to per-cpu streams and this attr is not needed anymore.
* However, we will keep it around for some time, because:
@@ -1198,6 +1355,9 @@ static DEVICE_ATTR_RW(use_dedup);
#else
static DEVICE_ATTR_RO(use_dedup);
#endif
+#ifdef CONFIG_ZRAM_WRITEBACK
+static DEVICE_ATTR_RW(backing_dev);
+#endif
static struct attribute *zram_disk_attrs[] = {
&dev_attr_disksize.attr,
@@ -1209,6 +1369,9 @@ static struct attribute *zram_disk_attrs[] = {
&dev_attr_max_comp_streams.attr,
&dev_attr_comp_algorithm.attr,
&dev_attr_use_dedup.attr,
+#ifdef CONFIG_ZRAM_WRITEBACK
+ &dev_attr_backing_dev.attr,
+#endif
&dev_attr_io_stat.attr,
&dev_attr_mm_stat.attr,
&dev_attr_debug_stat.attr,
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 8ccfdcd..5193bcb 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -136,6 +136,11 @@ struct zram {
*/
bool claim; /* Protected by bdev->bd_mutex */
bool use_dedup;
+#ifdef CONFIG_ZRAM_WRITEBACK
+ struct file *backing_dev;
+ struct block_device *bdev;
+ unsigned int old_block_size;
+#endif
};
static inline bool zram_dedup_enabled(struct zram *zram)
--
2.7.4
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC 4/7] zram: add free space management in backing device
2017-06-12 5:02 [RFC 0/7] writeback incompressible pages to storage Minchan Kim
` (2 preceding siblings ...)
2017-06-12 5:02 ` [RFC 3/7] zram: add interface to specify backing device Minchan Kim
@ 2017-06-12 5:02 ` Minchan Kim
2017-06-12 5:02 ` [RFC 5/7] zram: identify asynchronous IO's return value Minchan Kim
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Minchan Kim @ 2017-06-12 5:02 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-kernel, Juneho Choi, Sergey Senozhatsky, kernel-team,
Minchan Kim
With backing device, zram needs management of free space of
backing device.
This patch adds bitmap logic to manage free space which is
very naive. However, it would be simple enough as considering
uncompressible pages's frequenty in zram.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
drivers/block/zram/Kconfig | 13 ++++++++++++
drivers/block/zram/zram_drv.c | 48 ++++++++++++++++++++++++++++++++++++++++++-
drivers/block/zram/zram_drv.h | 3 +++
3 files changed, 63 insertions(+), 1 deletion(-)
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index 2f3dd1f..f2ca2b5 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -27,3 +27,16 @@ config ZRAM_DEDUP
computation time trade-off. Please check the benefit before
enabling this option. Experiment shows the positive effect when
the zram is used as blockdev and is used to store build output.
+
+config ZRAM_WRITEBACK
+ bool "Write back incompressible page to backing device"
+ depends on ZRAM
+ default n
+ help
+ With incompressible page, there is no memory saving to keep it
+ in memory. Instead, write it out to backing device.
+ For this feature, admin should set up backing device via
+ /sys/block/zramX/backing_dev.
+
+ See zram.txt for more infomration.
+
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index dcb6f83..d82914e 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -298,6 +298,9 @@ static void reset_bdev(struct zram *zram)
zram->backing_dev = NULL;
zram->old_block_size = 0;
zram->bdev = NULL;
+
+ kvfree(zram->bitmap);
+ zram->bitmap = NULL;
}
static ssize_t backing_dev_show(struct device *dev,
@@ -337,7 +340,8 @@ static ssize_t backing_dev_store(struct device *dev,
struct file *backing_dev = NULL;
struct inode *inode;
struct address_space *mapping;
- unsigned int old_block_size = 0;
+ unsigned int bitmap_sz, old_block_size = 0;
+ unsigned long nr_pages, *bitmap = NULL;
struct block_device *bdev = NULL;
int err;
size_t sz;
@@ -388,16 +392,27 @@ static ssize_t backing_dev_store(struct device *dev,
if (err < 0)
goto out;
+ nr_pages = i_size_read(inode) >> PAGE_SHIFT;
+ bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
+ bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
+ if (!bitmap) {
+ err = -ENOMEM;
+ goto out;
+ }
+
old_block_size = block_size(bdev);
err = set_blocksize(bdev, PAGE_SIZE);
if (err)
goto out;
reset_bdev(zram);
+ spin_lock_init(&zram->bitmap_lock);
zram->old_block_size = old_block_size;
zram->bdev = bdev;
zram->backing_dev = backing_dev;
+ zram->bitmap = bitmap;
+ zram->nr_pages = nr_pages;
up_write(&zram->init_lock);
pr_info("setup backing device %s\n", file_name);
@@ -407,6 +422,9 @@ static ssize_t backing_dev_store(struct device *dev,
return len;
out:
+ if (bitmap)
+ kvfree(bitmap);
+
if (bdev)
blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
@@ -422,6 +440,34 @@ static ssize_t backing_dev_store(struct device *dev,
return err;
}
+static unsigned long get_entry_bdev(struct zram *zram)
+{
+ unsigned long entry;
+
+ spin_lock(&zram->bitmap_lock);
+ /* skip 0 bit to confuse zram.handle = 0 */
+ entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
+ if (entry == zram->nr_pages) {
+ spin_unlock(&zram->bitmap_lock);
+ return 0;
+ }
+
+ set_bit(entry, zram->bitmap);
+ spin_unlock(&zram->bitmap_lock);
+
+ return entry;
+}
+
+static void put_entry_bdev(struct zram *zram, unsigned long entry)
+{
+ int was_set;
+
+ spin_lock(&zram->bitmap_lock);
+ was_set = test_and_clear_bit(entry, zram->bitmap);
+ spin_unlock(&zram->bitmap_lock);
+ WARN_ON_ONCE(!was_set);
+}
+
#else
static bool zram_wb_enabled(struct zram *zram) { return false; }
static void reset_bdev(struct zram *zram) {};
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 5193bcb..8ae3b3f 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -140,6 +140,9 @@ struct zram {
struct file *backing_dev;
struct block_device *bdev;
unsigned int old_block_size;
+ unsigned long *bitmap;
+ unsigned long nr_pages;
+ spinlock_t bitmap_lock;
#endif
};
--
2.7.4
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC 5/7] zram: identify asynchronous IO's return value
2017-06-12 5:02 [RFC 0/7] writeback incompressible pages to storage Minchan Kim
` (3 preceding siblings ...)
2017-06-12 5:02 ` [RFC 4/7] zram: add free space management in " Minchan Kim
@ 2017-06-12 5:02 ` Minchan Kim
2017-06-12 5:02 ` [RFC 6/7] zram: write incompressible pages to backing device Minchan Kim
2017-06-12 5:02 ` [RFC 7/7] zram: read page from " Minchan Kim
6 siblings, 0 replies; 8+ messages in thread
From: Minchan Kim @ 2017-06-12 5:02 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-kernel, Juneho Choi, Sergey Senozhatsky, kernel-team,
Minchan Kim
For upcoming asynchronous IO like writeback, zram_rw_page should
be aware of that whether requested IO was completed or submitted
successfully, otherwise error.
For the goal, zram_bvec_rw has three return values.
-errno: returns error number
0: IO request is done synchronously
1: IO request is issued successfully.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
drivers/block/zram/zram_drv.c | 32 ++++++++++++++++++++++++--------
1 file changed, 24 insertions(+), 8 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index d82914e..f5924ef 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -897,7 +897,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
{
- int ret;
+ int ret = 0;
struct zram_entry *uninitialized_var(entry);
unsigned int uninitialized_var(comp_len);
void *src, *dst, *mem;
@@ -1014,7 +1014,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
zram_slot_unlock(zram, index);
atomic64_inc(&zram->stats.pages_stored);
- return 0;
+ return ret;
}
static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
@@ -1096,6 +1096,11 @@ static void zram_bio_discard(struct zram *zram, u32 index,
}
}
+/*
+ * Returns errno if it has some problem. Otherwise return 0 or 1.
+ * Returns 0 if IO request was done synchronously
+ * Returns 1 if IO request was successfully submitted.
+ */
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
int offset, bool is_write)
{
@@ -1117,7 +1122,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
generic_end_io_acct(rw_acct, &zram->disk->part0, start_time);
- if (unlikely(ret)) {
+ if (unlikely(ret < 0)) {
if (!is_write)
atomic64_inc(&zram->stats.failed_reads);
else
@@ -1210,7 +1215,7 @@ static void zram_slot_free_notify(struct block_device *bdev,
static int zram_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, bool is_write)
{
- int offset, err = -EIO;
+ int offset, ret;
u32 index;
struct zram *zram;
struct bio_vec bv;
@@ -1219,7 +1224,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
if (!valid_io_request(zram, sector, PAGE_SIZE)) {
atomic64_inc(&zram->stats.invalid_io);
- err = -EINVAL;
+ ret = -EINVAL;
goto out;
}
@@ -1230,7 +1235,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
bv.bv_len = PAGE_SIZE;
bv.bv_offset = 0;
- err = zram_bvec_rw(zram, &bv, index, offset, is_write);
+ ret = zram_bvec_rw(zram, &bv, index, offset, is_write);
out:
/*
* If I/O fails, just return error(ie, non-zero) without
@@ -1240,9 +1245,20 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
* bio->bi_end_io does things to handle the error
* (e.g., SetPageError, set_page_dirty and extra works).
*/
- if (err == 0)
+ if (unlikely(ret < 0))
+ return ret;
+
+ switch (ret) {
+ case 0:
page_endio(page, is_write, 0);
- return err;
+ break;
+ case 1:
+ ret = 0;
+ break;
+ default:
+ WARN_ON(1);
+ }
+ return ret;
}
static void zram_reset_device(struct zram *zram)
--
2.7.4
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC 6/7] zram: write incompressible pages to backing device
2017-06-12 5:02 [RFC 0/7] writeback incompressible pages to storage Minchan Kim
` (4 preceding siblings ...)
2017-06-12 5:02 ` [RFC 5/7] zram: identify asynchronous IO's return value Minchan Kim
@ 2017-06-12 5:02 ` Minchan Kim
2017-06-12 5:02 ` [RFC 7/7] zram: read page from " Minchan Kim
6 siblings, 0 replies; 8+ messages in thread
From: Minchan Kim @ 2017-06-12 5:02 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-kernel, Juneho Choi, Sergey Senozhatsky, kernel-team,
Minchan Kim
This patch enables write IO to transfer data to backing device.
For that, it implements write_to_bdev function which creates
new bio and chaining with parent bio to make the parent bio
asynchrnous.
For rw_page which don't have parent bio, it submit owned bio
and handle IO completion by zram_page_end_io.
Also, this patch defines new flag ZRAM_WB to mark written page
for later read IO.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
drivers/block/zram/zram_drv.c | 108 ++++++++++++++++++++++++++++++++++++++----
drivers/block/zram/zram_drv.h | 1 +
2 files changed, 99 insertions(+), 10 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index f5924ef..9b0db9b 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -468,9 +468,75 @@ static void put_entry_bdev(struct zram *zram, unsigned long entry)
WARN_ON_ONCE(!was_set);
}
+void zram_page_end_io(struct bio *bio)
+{
+ struct page *page = bio->bi_io_vec[0].bv_page;
+
+ page_endio(page, op_is_write(bio_op(bio)), bio->bi_error);
+ bio_put(bio);
+}
+
+static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *parent,
+ unsigned long *pentry)
+{
+ struct bio *bio;
+ unsigned long entry;
+
+ bio = bio_alloc(GFP_ATOMIC, 1);
+ if (!bio)
+ return -ENOMEM;
+
+ entry = get_entry_bdev(zram);
+ if (!entry) {
+ bio_put(bio);
+ return -ENOSPC;
+ }
+
+ bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
+ bio->bi_bdev = zram->bdev;
+ if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len,
+ bvec->bv_offset)) {
+ bio_put(bio);
+ put_entry_bdev(zram, entry);
+ return -EIO;
+ }
+
+ if (!parent) {
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
+ bio->bi_end_io = zram_page_end_io;
+ } else {
+ bio->bi_opf = parent->bi_opf;
+ bio_chain(bio, parent);
+ }
+
+ submit_bio(bio);
+ *pentry = entry;
+
+ return 0;
+}
+
+static void zram_wb_clear(struct zram *zram, u32 index)
+{
+ unsigned long entry;
+
+ zram_clear_flag(zram, index, ZRAM_WB);
+ entry = zram_get_element(zram, index);
+ zram_set_element(zram, index, 0);
+ put_entry_bdev(zram, entry);
+}
+
#else
static bool zram_wb_enabled(struct zram *zram) { return false; }
static void reset_bdev(struct zram *zram) {};
+static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *parent,
+ unsigned long *pentry)
+
+{
+ return -EIO;
+}
+static void zram_wb_clear(struct zram *zram, u32 index) {}
#endif
@@ -789,7 +855,15 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
*/
static void zram_free_page(struct zram *zram, size_t index)
{
- struct zram_entry *entry = zram_get_entry(zram, index);
+ struct zram_entry *uninitialized_var(entry);
+
+ if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) {
+ zram_wb_clear(zram, index);
+ atomic64_dec(&zram->stats.pages_stored);
+ return;
+ }
+
+ entry = zram_get_entry(zram, index);
/*
* No memory is allocated for same element filled pages.
@@ -895,7 +969,8 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
return ret;
}
-static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
+static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *bio)
{
int ret = 0;
struct zram_entry *uninitialized_var(entry);
@@ -907,6 +982,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
enum zram_pageflags flags = 0;
unsigned long uninitialized_var(element);
unsigned long alloced_pages;
+ bool allow_wb = true;
mem = kmap_atomic(page);
if (page_same_filled(mem, &element)) {
@@ -940,8 +1016,20 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
return ret;
}
- if (unlikely(comp_len > max_zpage_size))
+ if (unlikely(comp_len > max_zpage_size)) {
+ if (zram_wb_enabled(zram) && allow_wb) {
+ zcomp_stream_put(zram->comp);
+ ret = write_to_bdev(zram, bvec, index, bio, &element);
+ if (!ret) {
+ flags = ZRAM_WB;
+ ret = 1;
+ goto out;
+ }
+ allow_wb = false;
+ goto compress_again;
+ }
comp_len = PAGE_SIZE;
+ }
/*
* entry allocation has 2 paths:
@@ -1005,7 +1093,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
zram_free_page(zram, index);
if (flags)
zram_set_flag(zram, index, flags);
- if (flags != ZRAM_SAME) {
+ if (flags != ZRAM_SAME && flags != ZRAM_WB) {
zram_set_obj_size(zram, index, comp_len);
zram_set_entry(zram, index, entry);
} else {
@@ -1018,7 +1106,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
}
static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset)
+ u32 index, int offset, struct bio *bio)
{
int ret;
struct page *page = NULL;
@@ -1051,7 +1139,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
vec.bv_offset = 0;
}
- ret = __zram_bvec_write(zram, &vec, index);
+ ret = __zram_bvec_write(zram, &vec, index, bio);
out:
if (is_partial_io(bvec))
__free_page(page);
@@ -1102,7 +1190,7 @@ static void zram_bio_discard(struct zram *zram, u32 index,
* Returns 1 if IO request was successfully submitted.
*/
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
- int offset, bool is_write)
+ int offset, bool is_write, struct bio *bio)
{
unsigned long start_time = jiffies;
int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
@@ -1117,7 +1205,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
flush_dcache_page(bvec->bv_page);
} else {
atomic64_inc(&zram->stats.num_writes);
- ret = zram_bvec_write(zram, bvec, index, offset);
+ ret = zram_bvec_write(zram, bvec, index, offset, bio);
}
generic_end_io_acct(rw_acct, &zram->disk->part0, start_time);
@@ -1161,7 +1249,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
unwritten);
if (zram_bvec_rw(zram, &bv, index, offset,
- op_is_write(bio_op(bio))) < 0)
+ op_is_write(bio_op(bio)), bio) < 0)
goto out;
bv.bv_offset += bv.bv_len;
@@ -1235,7 +1323,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
bv.bv_len = PAGE_SIZE;
bv.bv_offset = 0;
- ret = zram_bvec_rw(zram, &bv, index, offset, is_write);
+ ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL);
out:
/*
* If I/O fails, just return error(ie, non-zero) without
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 8ae3b3f..98fb07c 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -66,6 +66,7 @@ enum zram_pageflags {
ZRAM_SAME = ZRAM_FLAG_SHIFT,
ZRAM_DUP,
ZRAM_ACCESS, /* page is now accessed */
+ ZRAM_WB, /* page is stored on backing_device */
__NR_ZRAM_PAGEFLAGS,
};
--
2.7.4
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC 7/7] zram: read page from backing device
2017-06-12 5:02 [RFC 0/7] writeback incompressible pages to storage Minchan Kim
` (5 preceding siblings ...)
2017-06-12 5:02 ` [RFC 6/7] zram: write incompressible pages to backing device Minchan Kim
@ 2017-06-12 5:02 ` Minchan Kim
6 siblings, 0 replies; 8+ messages in thread
From: Minchan Kim @ 2017-06-12 5:02 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-kernel, Juneho Choi, Sergey Senozhatsky, kernel-team,
Minchan Kim
This patch enables read IO from backing device. For the feature,
it implements two IO read functions to transfer data from backing
storage.
One is asynchronous IO function and other is synchronous one.
A reason I need synchrnous IO is due to partial write which need to
complete read IO before the overwriting partial data.
We can make the partial IO's case asynchronous, too but at the moment,
I don't feel adding more complexity to support such rare use cases
so want to go with simple.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
drivers/block/zram/zram_drv.c | 123 ++++++++++++++++++++++++++++++++++++++++--
1 file changed, 118 insertions(+), 5 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 9b0db9b..d9eb6df 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -476,6 +476,95 @@ void zram_page_end_io(struct bio *bio)
bio_put(bio);
}
+/*
+ * Returns 0 if the submission is successful.
+ */
+static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *parent)
+{
+ struct bio *bio;
+
+ bio = bio_alloc(GFP_ATOMIC, 1);
+ if (!bio)
+ return -ENOMEM;
+
+ bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
+ bio->bi_bdev = zram->bdev;
+ if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
+ bio_put(bio);
+ return -EIO;
+ }
+
+ if (!parent) {
+ bio->bi_opf = REQ_OP_READ;
+ bio->bi_end_io = zram_page_end_io;
+ } else {
+ bio->bi_opf = parent->bi_opf;
+ bio_chain(bio, parent);
+ }
+
+ submit_bio(bio);
+ return 1;
+}
+
+struct zram_work {
+ struct work_struct work;
+ struct zram *zram;
+ unsigned long entry;
+ struct bio *bio;
+};
+
+#if PAGE_SIZE != 4096
+static void zram_sync_read(struct work_struct *work)
+{
+ struct bio_vec bvec;
+ struct zram_work *zw = container_of(work, struct zram_work, work);
+ struct zram *zram = zw->zram;
+ unsigned long entry = zw->entry;
+ struct bio *bio = zw->bio;
+
+ read_from_bdev_async(zram, &bvec, entry, bio);
+}
+
+/*
+ * Block layer want one ->make_request_fn to be active at a time
+ * so if we use chained IO with parent IO in same context,
+ * it's a deadlock. To avoid, it, it uses worker thread context.
+ */
+static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *bio)
+{
+ struct zram_work work;
+
+ work.zram = zram;
+ work.entry = entry;
+ work.bio = bio;
+
+ INIT_WORK_ONSTACK(&work.work, zram_sync_read);
+ queue_work(system_unbound_wq, &work.work);
+ flush_work(&work.work);
+ destroy_work_on_stack(&work.work);
+
+ return 1;
+}
+#else
+static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *bio)
+{
+ WARN_ON(1);
+ return -EIO;
+}
+#endif
+
+static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *parent, bool sync)
+{
+ if (sync)
+ return read_from_bdev_sync(zram, bvec, entry, parent);
+ else
+ return read_from_bdev_async(zram, bvec, entry, parent);
+}
+
static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
u32 index, struct bio *parent,
unsigned long *pentry)
@@ -536,6 +625,12 @@ static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
{
return -EIO;
}
+
+static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *parent, bool sync)
+{
+ return -EIO;
+}
static void zram_wb_clear(struct zram *zram, u32 index) {}
#endif
@@ -897,13 +992,31 @@ static void zram_free_page(struct zram *zram, size_t index)
zram_set_obj_size(zram, index, 0);
}
-static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index)
+static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
+ struct bio *bio, bool partial_io)
{
int ret;
struct zram_entry *entry;
unsigned int size;
void *src, *dst;
+ if (zram_wb_enabled(zram)) {
+ zram_slot_lock(zram, index);
+ if (zram_test_flag(zram, index, ZRAM_WB)) {
+ struct bio_vec bvec;
+
+ zram_slot_unlock(zram, index);
+
+ bvec.bv_page = page;
+ bvec.bv_len = PAGE_SIZE;
+ bvec.bv_offset = 0;
+ return read_from_bdev(zram, &bvec,
+ zram_get_element(zram, index),
+ bio, partial_io);
+ }
+ zram_slot_unlock(zram, index);
+ }
+
if (zram_same_page_read(zram, index, page, 0, PAGE_SIZE))
return 0;
@@ -937,7 +1050,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index)
}
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset)
+ u32 index, int offset, struct bio *bio)
{
int ret;
struct page *page;
@@ -950,7 +1063,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
return -ENOMEM;
}
- ret = __zram_bvec_read(zram, page, index);
+ ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
if (unlikely(ret))
goto out;
@@ -1124,7 +1237,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
if (!page)
return -ENOMEM;
- ret = __zram_bvec_read(zram, page, index);
+ ret = __zram_bvec_read(zram, page, index, bio, true);
if (ret)
goto out;
@@ -1201,7 +1314,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
if (!is_write) {
atomic64_inc(&zram->stats.num_reads);
- ret = zram_bvec_read(zram, bvec, index, offset);
+ ret = zram_bvec_read(zram, bvec, index, offset, bio);
flush_dcache_page(bvec->bv_page);
} else {
atomic64_inc(&zram->stats.num_writes);
--
2.7.4
^ permalink raw reply related [flat|nested] 8+ messages in thread