All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] dm-writecache
@ 2017-11-07 20:59 Mikulas Patocka
  2017-11-07 21:00 ` [PATCH 1/4] bio: have bio_kmap_irq return the size of mapped data Mikulas Patocka
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Mikulas Patocka @ 2017-11-07 20:59 UTC (permalink / raw)
  To: Mike Snitzer, Jonathan Brassow, Alasdair G. Kergon; +Cc: dm-devel

Hi

Here I'm sending patches for the dm-writecache target. It is for kernel 
4.14-rc8.

Mikulas

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/4] bio: have bio_kmap_irq return the size of mapped data
  2017-11-07 20:59 [PATCH 0/4] dm-writecache Mikulas Patocka
@ 2017-11-07 21:00 ` Mikulas Patocka
  2017-11-07 21:04 ` [PATCH 2/4] vmalloc: introduce vmap_pfn Mikulas Patocka
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Mikulas Patocka @ 2017-11-07 21:00 UTC (permalink / raw)
  To: Mike Snitzer, Jonathan Brassow, Alasdair G. Kergon; +Cc: dm-devel

The function bio_kmap_irq is not usable because it does not return the
size of the mapped data.

Fix bio_kmap_irq and __bio_kmap_irq so that they return the size of
mapped data.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 include/linux/bio.h |   10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

Index: linux-2.6/include/linux/bio.h
===================================================================
--- linux-2.6.orig/include/linux/bio.h
+++ linux-2.6/include/linux/bio.h
@@ -576,14 +576,16 @@ static inline void bvec_kunmap_irq(char
 #endif
 
 static inline char *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter,
-				   unsigned long *flags)
+				   unsigned long *flags, unsigned *size)
 {
-	return bvec_kmap_irq(&bio_iter_iovec(bio, iter), flags);
+	struct bio_vec bv = bio_iter_iovec(bio, iter);
+	*size = bv.bv_len;
+	return bvec_kmap_irq(&bv, flags);
 }
 #define __bio_kunmap_irq(buf, flags)	bvec_kunmap_irq(buf, flags)
 
-#define bio_kmap_irq(bio, flags) \
-	__bio_kmap_irq((bio), (bio)->bi_iter, (flags))
+#define bio_kmap_irq(bio, flags, size) \
+	__bio_kmap_irq((bio), (bio)->bi_iter, (flags), (size))
 #define bio_kunmap_irq(buf,flags)	__bio_kunmap_irq(buf, flags)
 
 /*

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 2/4] vmalloc: introduce vmap_pfn
  2017-11-07 20:59 [PATCH 0/4] dm-writecache Mikulas Patocka
  2017-11-07 21:00 ` [PATCH 1/4] bio: have bio_kmap_irq return the size of mapped data Mikulas Patocka
@ 2017-11-07 21:04 ` Mikulas Patocka
  2017-11-07 21:04 ` [PATCH 3/4] swait: export the symbols __prepare_to_swait and __finish_swait Mikulas Patocka
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Mikulas Patocka @ 2017-11-07 21:04 UTC (permalink / raw)
  To: Mike Snitzer, Jonathan Brassow, Alasdair G. Kergon; +Cc: dm-devel

There's a function vmap that can take discontiguous pages and map them 
linearly to the vmalloc space. However, persistent memory may not be 
backed by pages, so we can't use vmap on it.

This patch introduces a function vmap_pfn that works like vmap, but it 
takes an array of page frame numbers (pfn_t). It can be used to remap 
discontiguous chunks of persistent memory into a linear range.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 include/linux/vmalloc.h |    2 +
 mm/vmalloc.c            |   88 +++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 71 insertions(+), 19 deletions(-)

Index: linux-2.6/include/linux/vmalloc.h
===================================================================
--- linux-2.6.orig/include/linux/vmalloc.h
+++ linux-2.6/include/linux/vmalloc.h
@@ -98,6 +98,8 @@ extern void vfree_atomic(const void *add
 
 extern void *vmap(struct page **pages, unsigned int count,
 			unsigned long flags, pgprot_t prot);
+extern void *vmap_pfn(pfn_t *pfns, unsigned int count,
+			unsigned long flags, pgprot_t prot);
 extern void vunmap(const void *addr);
 
 extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
Index: linux-2.6/mm/vmalloc.c
===================================================================
--- linux-2.6.orig/mm/vmalloc.c
+++ linux-2.6/mm/vmalloc.c
@@ -31,6 +31,7 @@
 #include <linux/compiler.h>
 #include <linux/llist.h>
 #include <linux/bitops.h>
+#include <linux/pfn_t.h>
 
 #include <linux/uaccess.h>
 #include <asm/tlbflush.h>
@@ -132,7 +133,7 @@ static void vunmap_page_range(unsigned l
 }
 
 static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+		unsigned long end, pgprot_t prot, struct page **pages, pfn_t *pfns, int *nr)
 {
 	pte_t *pte;
 
@@ -145,20 +146,25 @@ static int vmap_pte_range(pmd_t *pmd, un
 	if (!pte)
 		return -ENOMEM;
 	do {
-		struct page *page = pages[*nr];
-
+		unsigned long pf;
+		if (pages) {
+			struct page *page = pages[*nr];
+			if (WARN_ON(!page))
+				return -ENOMEM;
+			pf = page_to_pfn(page);
+		} else {
+			pf = pfn_t_to_pfn(pfns[*nr]);
+		}
 		if (WARN_ON(!pte_none(*pte)))
 			return -EBUSY;
-		if (WARN_ON(!page))
-			return -ENOMEM;
-		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
+		set_pte_at(&init_mm, addr, pte, pfn_pte(pf, prot));
 		(*nr)++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	return 0;
 }
 
 static int vmap_pmd_range(pud_t *pud, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+		unsigned long end, pgprot_t prot, struct page **pages, pfn_t *pfns, int *nr)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -168,14 +174,14 @@ static int vmap_pmd_range(pud_t *pud, un
 		return -ENOMEM;
 	do {
 		next = pmd_addr_end(addr, end);
-		if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
+		if (vmap_pte_range(pmd, addr, next, prot, pages, pfns, nr))
 			return -ENOMEM;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
 }
 
 static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+		unsigned long end, pgprot_t prot, struct page **pages, pfn_t *pfns, int *nr)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -185,14 +191,14 @@ static int vmap_pud_range(p4d_t *p4d, un
 		return -ENOMEM;
 	do {
 		next = pud_addr_end(addr, end);
-		if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
+		if (vmap_pmd_range(pud, addr, next, prot, pages, pfns, nr))
 			return -ENOMEM;
 	} while (pud++, addr = next, addr != end);
 	return 0;
 }
 
 static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+		unsigned long end, pgprot_t prot, struct page **pages, pfn_t *pfns, int *nr)
 {
 	p4d_t *p4d;
 	unsigned long next;
@@ -202,7 +208,7 @@ static int vmap_p4d_range(pgd_t *pgd, un
 		return -ENOMEM;
 	do {
 		next = p4d_addr_end(addr, end);
-		if (vmap_pud_range(p4d, addr, next, prot, pages, nr))
+		if (vmap_pud_range(p4d, addr, next, prot, pages, pfns, nr))
 			return -ENOMEM;
 	} while (p4d++, addr = next, addr != end);
 	return 0;
@@ -215,7 +221,7 @@ static int vmap_p4d_range(pgd_t *pgd, un
  * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
  */
 static int vmap_page_range_noflush(unsigned long start, unsigned long end,
-				   pgprot_t prot, struct page **pages)
+				   pgprot_t prot, struct page **pages, pfn_t *pfns)
 {
 	pgd_t *pgd;
 	unsigned long next;
@@ -227,7 +233,7 @@ static int vmap_page_range_noflush(unsig
 	pgd = pgd_offset_k(addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr);
+		err = vmap_p4d_range(pgd, addr, next, prot, pages, pfns, &nr);
 		if (err)
 			return err;
 	} while (pgd++, addr = next, addr != end);
@@ -236,11 +242,11 @@ static int vmap_page_range_noflush(unsig
 }
 
 static int vmap_page_range(unsigned long start, unsigned long end,
-			   pgprot_t prot, struct page **pages)
+			   pgprot_t prot, struct page **pages, pfn_t *pfns)
 {
 	int ret;
 
-	ret = vmap_page_range_noflush(start, end, prot, pages);
+	ret = vmap_page_range_noflush(start, end, prot, pages, pfns);
 	flush_cache_vmap(start, end);
 	return ret;
 }
@@ -1191,7 +1197,7 @@ void *vm_map_ram(struct page **pages, un
 		addr = va->va_start;
 		mem = (void *)addr;
 	}
-	if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
+	if (vmap_page_range(addr, addr + size, prot, pages, NULL) < 0) {
 		vm_unmap_ram(mem, count);
 		return NULL;
 	}
@@ -1306,7 +1312,7 @@ void __init vmalloc_init(void)
 int map_kernel_range_noflush(unsigned long addr, unsigned long size,
 			     pgprot_t prot, struct page **pages)
 {
-	return vmap_page_range_noflush(addr, addr + size, prot, pages);
+	return vmap_page_range_noflush(addr, addr + size, prot, pages, NULL);
 }
 
 /**
@@ -1347,13 +1353,24 @@ void unmap_kernel_range(unsigned long ad
 }
 EXPORT_SYMBOL_GPL(unmap_kernel_range);
 
+static int map_vm_area_pfn(struct vm_struct *area, pgprot_t prot, pfn_t *pfns)
+{
+	unsigned long addr = (unsigned long)area->addr;
+	unsigned long end = addr + get_vm_area_size(area);
+	int err;
+
+	err = vmap_page_range(addr, end, prot, NULL, pfns);
+
+	return err > 0 ? 0 : err;
+}
+
 int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
 {
 	unsigned long addr = (unsigned long)area->addr;
 	unsigned long end = addr + get_vm_area_size(area);
 	int err;
 
-	err = vmap_page_range(addr, end, prot, pages);
+	err = vmap_page_range(addr, end, prot, pages, NULL);
 
 	return err > 0 ? 0 : err;
 }
@@ -1660,6 +1677,39 @@ void *vmap(struct page **pages, unsigned
 }
 EXPORT_SYMBOL(vmap);
 
+/**
+ *	vmap_pfn  -  map an array of pages into virtually contiguous space
+ *	@pfns:		array of page frame numbers
+ *	@count:		number of pages to map
+ *	@flags:		vm_area->flags
+ *	@prot:		page protection for the mapping
+ *
+ *	Maps @count pages from @pages into contiguous kernel virtual
+ *	space.
+ */
+void *vmap_pfn(pfn_t *pfns, unsigned int count, unsigned long flags, pgprot_t prot)
+{
+	struct vm_struct *area;
+	unsigned long size;		/* In bytes */
+
+	might_sleep();
+
+	size = (unsigned long)count << PAGE_SHIFT;
+	if (unlikely((size >> PAGE_SHIFT) != count))
+		return NULL;
+	area = get_vm_area_caller(size, flags, __builtin_return_address(0));
+	if (!area)
+		return NULL;
+
+	if (map_vm_area_pfn(area, prot, pfns)) {
+		vunmap(area->addr);
+		return NULL;
+	}
+
+	return area->addr;
+}
+EXPORT_SYMBOL(vmap_pfn);
+
 static void *__vmalloc_node(unsigned long size, unsigned long align,
 			    gfp_t gfp_mask, pgprot_t prot,
 			    int node, const void *caller);

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 3/4] swait: export the symbols __prepare_to_swait and __finish_swait
  2017-11-07 20:59 [PATCH 0/4] dm-writecache Mikulas Patocka
  2017-11-07 21:00 ` [PATCH 1/4] bio: have bio_kmap_irq return the size of mapped data Mikulas Patocka
  2017-11-07 21:04 ` [PATCH 2/4] vmalloc: introduce vmap_pfn Mikulas Patocka
@ 2017-11-07 21:04 ` Mikulas Patocka
  2017-11-07 21:05 ` [PATCH 4/4] dm-writecache Mikulas Patocka
  2017-11-07 21:20 ` [PATCH 0/4] dm-writecache Mike Snitzer
  4 siblings, 0 replies; 6+ messages in thread
From: Mikulas Patocka @ 2017-11-07 21:04 UTC (permalink / raw)
  To: Mike Snitzer, Jonathan Brassow, Alasdair G. Kergon; +Cc: dm-devel

In order to reduce locking overhead, I use the spinlock in
swait_queue_head to protect not only the wait queue, but also the list of
events. Consequently, I need to use unlocked functions __prepare_to_swait
and __finish_swait. These functions are declared in the file
include/linux/swait.h, but they are not exported, and so they are not
useable from kernel modules.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 kernel/sched/swait.c |    2 ++
 1 file changed, 2 insertions(+)

Index: linux-2.6/kernel/sched/swait.c
===================================================================
--- linux-2.6.orig/kernel/sched/swait.c
+++ linux-2.6/kernel/sched/swait.c
@@ -73,6 +73,7 @@ void __prepare_to_swait(struct swait_que
 	if (list_empty(&wait->task_list))
 		list_add(&wait->task_list, &q->task_list);
 }
+EXPORT_SYMBOL(__prepare_to_swait);
 
 void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
 {
@@ -102,6 +103,7 @@ void __finish_swait(struct swait_queue_h
 	if (!list_empty(&wait->task_list))
 		list_del_init(&wait->task_list);
 }
+EXPORT_SYMBOL(__finish_swait);
 
 void finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
 {

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 4/4] dm-writecache
  2017-11-07 20:59 [PATCH 0/4] dm-writecache Mikulas Patocka
                   ` (2 preceding siblings ...)
  2017-11-07 21:04 ` [PATCH 3/4] swait: export the symbols __prepare_to_swait and __finish_swait Mikulas Patocka
@ 2017-11-07 21:05 ` Mikulas Patocka
  2017-11-07 21:20 ` [PATCH 0/4] dm-writecache Mike Snitzer
  4 siblings, 0 replies; 6+ messages in thread
From: Mikulas Patocka @ 2017-11-07 21:05 UTC (permalink / raw)
  To: Mike Snitzer, Jonathan Brassow, Alasdair G. Kergon; +Cc: dm-devel

The dm-writecache target.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 drivers/md/Kconfig         |   11 
 drivers/md/Makefile        |    1 
 drivers/md/dm-writecache.c | 2356 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 2368 insertions(+)

Index: linux-2.6/drivers/md/Kconfig
===================================================================
--- linux-2.6.orig/drivers/md/Kconfig
+++ linux-2.6/drivers/md/Kconfig
@@ -326,6 +326,17 @@ config DM_CACHE_SMQ
          of less memory utilization, improved performance and increased
          adaptability in the face of changing workloads.
 
+config DM_WRITECACHE
+	tristate "Writecache target"
+	depends on BLK_DEV_DM
+	---help---
+	   The writecache target caches writes on persistent memory or SSD.
+	   It is intended for databases or other programs that need extremely
+	   low commit latency.
+
+	   The writecache target doesn't cache reads because reads are supposed
+	   to be cached in standard RAM.
+
 config DM_ERA
        tristate "Era target (EXPERIMENTAL)"
        depends on BLK_DEV_DM
Index: linux-2.6/drivers/md/Makefile
===================================================================
--- linux-2.6.orig/drivers/md/Makefile
+++ linux-2.6/drivers/md/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_DM_ERA)		+= dm-era.o
 obj-$(CONFIG_DM_LOG_WRITES)	+= dm-log-writes.o
 obj-$(CONFIG_DM_INTEGRITY)	+= dm-integrity.o
 obj-$(CONFIG_DM_ZONED)		+= dm-zoned.o
+obj-$(CONFIG_DM_WRITECACHE)	+= dm-writecache.o
 
 ifeq ($(CONFIG_DM_UEVENT),y)
 dm-mod-objs			+= dm-uevent.o
Index: linux-2.6/drivers/md/dm-writecache.c
===================================================================
--- /dev/null
+++ linux-2.6/drivers/md/dm-writecache.c
@@ -0,0 +1,2356 @@
+#include <linux/device-mapper.h>
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/kthread.h>
+#include <linux/swait.h>
+#include <linux/delay.h>
+#include <linux/dm-io.h>
+#include <linux/dm-kcopyd.h>
+#include <linux/dax.h>
+#include <linux/pfn_t.h>
+
+#define DM_MSG_PREFIX	"writecache"
+
+#define HIGH_WATERMARK			50
+#define LOW_WATERMARK			45
+#define MAX_WRITEBACK_JOBS		1024
+#define ENDIO_LATENCY			16
+#define WRITEBACK_LATENCY		64
+#define AUTOCOMMIT_BLOCKS_SSD		65536
+#define AUTOCOMMIT_BLOCKS_PMEM		64
+#define AUTOCOMMIT_MSEC			1000
+
+/*
+ * If the architecture doesn't support persistent memory, we can use this driver
+ * in SSD-only mode.
+ */
+#ifndef CONFIG_ARCH_HAS_PMEM_API
+#define DM_WRITECACHE_ONLY_SSD
+#endif
+
+//#define WC_MEASURE_LATENCY
+
+#define BITMAP_GRANULARITY	65536
+#if BITMAP_GRANULARITY < PAGE_SIZE
+#undef BITMAP_GRANULARITY
+#define BITMAP_GRANULARITY	PAGE_SIZE
+#endif
+
+/*
+ * The clwb instruction is very slow on Broadwell, so we use non-temporal
+ * stores instead.
+ */
+#ifdef CONFIG_X86_64
+#define NT_STORE(dest, src)	asm ("movnti %1, %0" : "=m"(dest) : "r"(src))
+#define FLUSH_RANGE(dax, ptr, size)  do { } while (0)
+#define COMMIT_FLUSHED()	wmb()
+#else
+#define NT_STORE(dest, src)	ACCESS_ONCE(dest) = (src)
+#define FLUSH_RANGE		dax_flush
+#define COMMIT_FLUSHED()	do { } while (0)
+#endif
+
+#if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && !defined(DM_WRITECACHE_ONLY_SSD)
+#define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
+#endif
+
+struct wc_pmem_holder {
+	struct dm_dev *dev;
+	bool vmapped;
+	bool page_backed;
+};
+
+static int persistent_memory_claim(struct dm_target *ti, const char *name,
+				   struct wc_pmem_holder *pmem_holder, void **addr, uint64_t *size)
+{
+	int r;
+	loff_t s;
+	long p, da;
+	pfn_t pfn;
+	int id;
+
+	pmem_holder->vmapped = false;
+	pmem_holder->page_backed = true;
+
+	r = dm_get_device(ti, name, FMODE_READ | FMODE_WRITE, &pmem_holder->dev);
+	if (r)
+		goto err0;
+
+	if (!pmem_holder->dev->dax_dev) {
+		r = -EOPNOTSUPP;
+		goto err1;
+	}
+	s = i_size_read(pmem_holder->dev->bdev->bd_inode);
+	p = s >> PAGE_SHIFT;
+	if (!p) {
+		r = -EINVAL;
+		goto err1;
+	}
+	if (p != s >> PAGE_SHIFT) {
+		r = -EOVERFLOW;
+		goto err1;
+	}
+
+	id = dax_read_lock();
+
+	da = dax_direct_access(pmem_holder->dev->dax_dev, 0, p, addr, &pfn);
+	if (da < 0) {
+		r = da;
+		goto err2;
+	}
+#ifdef WC_MEASURE_LATENCY
+	printk(KERN_DEBUG "dm-writecache: device %s, pfn %016llx\n", pmem_holder->dev->name, pfn.val);
+#endif
+	if (da == p) {
+		pmem_holder->page_backed &= pfn_t_has_page(pfn);
+	} else {
+		long i;
+		pfn_t *pfns;
+		pfns = kvmalloc(p * sizeof(pfn_t), GFP_KERNEL);
+		if (!pfns) {
+			r = -ENOMEM;
+			goto err2;
+		}
+		i = 0;
+		do {
+			long daa;
+			daa = dax_direct_access(pmem_holder->dev->dax_dev, i, p - i, addr, &pfn);
+			if (daa <= 0) {
+				r = daa ? daa : -EINVAL;
+				kvfree(pfns);
+				goto err2;
+			}
+			pmem_holder->page_backed &= pfn_t_has_page(pfn);
+			while (daa-- && i < p) {
+				pfns[i++] = pfn;
+				pfn.val++;
+			}
+		} while (i < p);
+		*addr = vmap_pfn(pfns, p, VM_MAP, PAGE_KERNEL);
+		kvfree(pfns);
+		if (!*addr) {
+			r = -ENOMEM;
+			goto err2;
+		}
+		pmem_holder->vmapped = true;
+	}
+	*size = s;
+
+	dax_read_unlock(id);
+#ifdef WC_MEASURE_LATENCY
+	printk(KERN_DEBUG "dm-writecache: device %s, pages %d\n", pmem_holder->dev->name, pmem_holder->page_backed);
+#endif
+
+	return 0;
+
+err2:
+	dax_read_unlock(id);
+err1:
+	dm_put_device(ti, pmem_holder->dev);
+err0:
+	return r;
+}
+
+static void persistent_memory_release(struct dm_target *ti, struct wc_pmem_holder *pmem_holder,
+				      void *addr, size_t size)
+{
+	if (pmem_holder->vmapped)
+		vunmap(addr);
+	dm_put_device(ti, pmem_holder->dev);
+}
+
+static struct page *persistent_memory_page(void *addr)
+{
+	if (is_vmalloc_addr(addr))
+		return vmalloc_to_page(addr);
+	else
+		return virt_to_page(addr);
+}
+
+static unsigned persistent_memory_page_offset(void *addr)
+{
+	return (unsigned long)addr & (PAGE_SIZE - 1);
+}
+
+static void persistent_memory_flush_cache(void *ptr, size_t size)
+{
+	if (is_vmalloc_addr(ptr))
+		flush_kernel_vmap_range(ptr, size);
+}
+
+static void persistent_memory_flush(struct wc_pmem_holder *pmem_holder, void *ptr, size_t size)
+{
+	FLUSH_RANGE(pmem_holder->dev->dax_dev, ptr, size);
+}
+
+static void persistent_memory_commit_flushed(void)
+{
+	COMMIT_FLUSHED();
+}
+
+#define MEMORY_SUPERBLOCK_MAGIC		0x23489321
+#define MEMORY_SUPERBLOCK_VERSION	1
+
+struct wc_memory_entry {
+	uint64_t original_sector;
+	uint64_t seq_count;
+};
+
+struct wc_memory_superblock {
+	union {
+		struct {
+			uint32_t magic;
+			uint32_t version;
+			uint32_t block_size;
+			uint32_t pad;
+			uint64_t n_blocks;
+			uint64_t seq_count;
+		};
+		uint64_t padding[8];
+	};
+	struct wc_memory_entry entries[0];
+};
+
+struct wc_entry {
+	struct rb_node rb_node;
+	struct list_head lru;
+	unsigned short wc_list_contiguous;
+	bool write_in_progress
+#if BITS_PER_LONG == 64
+		:1
+#endif
+	;
+	unsigned long index
+#if BITS_PER_LONG == 64
+		:47
+#endif
+	;
+#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
+	uint64_t original_sector;
+	uint64_t seq_count;
+#endif
+};
+
+#ifndef DM_WRITECACHE_ONLY_SSD
+#define WC_MODE_PMEM(wc)			((wc)->pmem_mode)
+#else
+#define WC_MODE_PMEM(wc)			false
+#endif
+
+struct dm_writecache {
+#ifndef DM_WRITECACHE_ONLY_SSD
+	bool pmem_mode;
+#endif
+	struct mutex lock;
+	struct rb_root tree;
+	struct list_head lru;
+	struct list_head freelist;
+	size_t freelist_size;
+	size_t writeback_size;
+	unsigned uncommitted_blocks;
+	unsigned autocommit_blocks;
+	unsigned max_writeback_jobs;
+	size_t freelist_high_watermark;
+	size_t freelist_low_watermark;
+	struct timer_list autocommit_timer;
+	unsigned long autocommit_jiffies;
+	struct swait_queue_head freelist_wait;
+
+	struct dm_target *ti;
+	struct dm_dev *dev;
+	struct dm_dev *ssd_dev;
+	void *memory_map;
+	uint64_t memory_map_size;
+	size_t metadata_sectors;
+	void *block_start;
+	struct wc_entry *entries;
+	unsigned block_size;
+	unsigned char block_size_bits;
+	size_t n_blocks;
+	uint64_t seq_count;
+	int error;
+
+	bool overwrote_committed;
+
+	atomic_t bio_in_progress[2];
+	struct swait_queue_head bio_in_progress_wait[2];
+
+	struct dm_io_client *dm_io;
+
+	unsigned writeback_all;
+	struct workqueue_struct *writeback_wq;
+	struct work_struct writeback_work;
+	struct work_struct flush_work;
+
+	struct swait_queue_head endio_thread_wait;
+	struct list_head endio_list;
+	struct task_struct *endio_thread;
+
+	struct task_struct *flush_thread;
+	struct bio *flush_bio;
+	struct completion flush_completion;
+
+	struct bio_set *bio_set;
+	mempool_t *copy_pool;
+	mempool_t *page_pool;
+	struct wc_pmem_holder pmem_holder;
+	const char *memory_name;
+
+	struct dm_kcopyd_client *dm_kcopyd;
+	unsigned long *dirty_bitmap;
+	unsigned dirty_bitmap_size;
+
+	bool high_wm_percent_set;
+	bool low_wm_percent_set;
+	bool max_writeback_jobs_set;
+	bool autocommit_blocks_set;
+	bool autocommit_time_set;
+
+#ifdef WC_MEASURE_LATENCY
+	ktime_t lock_acquired_time;
+	ktime_t max_lock_held;
+	ktime_t max_lock_wait;
+	ktime_t max_freelist_wait;
+	ktime_t measure_latency_time;
+	ktime_t max_measure_latency;
+#endif
+};
+
+#define WB_LIST_INLINE		16
+
+struct writeback_struct {
+	struct list_head endio_entry;
+	struct dm_writecache *wc;
+	struct wc_entry **wc_list;
+	unsigned wc_list_n;
+	unsigned page_offset;
+	struct page *page;
+	struct wc_entry *wc_list_inline[WB_LIST_INLINE];
+	struct bio bio;
+};
+
+struct copy_struct {
+	struct list_head endio_entry;
+	struct dm_writecache *wc;
+	struct wc_entry *e;
+	unsigned n_entries;
+	int error;
+};
+
+DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(dm_writecache_throttle,
+					    "A percentage of time allocated for data copying");
+
+static inline void measure_latency_start(struct dm_writecache *wc)
+{
+#ifdef WC_MEASURE_LATENCY
+	wc->measure_latency_time = ktime_get();
+#endif
+}
+
+static inline void measure_latency_end(struct dm_writecache *wc, unsigned long n)
+{
+#ifdef WC_MEASURE_LATENCY
+	ktime_t now = ktime_get();
+	if (now - wc->measure_latency_time > wc->max_measure_latency) {
+		wc->max_measure_latency = now - wc->measure_latency_time;
+		printk(KERN_DEBUG "dm-writecache: measured latency %lld.%03lldus, %lu steps\n", wc->max_measure_latency / 1000, wc->max_measure_latency % 1000, n);
+	}
+#endif
+}
+
+static void __wc_lock(struct dm_writecache *wc, int line)
+{
+#ifdef WC_MEASURE_LATENCY
+	ktime_t before, after;
+	before = ktime_get();
+#endif
+	mutex_lock(&wc->lock);
+#ifdef WC_MEASURE_LATENCY
+	after = ktime_get();
+	if (unlikely(after - before > wc->max_lock_wait)) {
+		wc->max_lock_wait = after - before;
+		printk(KERN_DEBUG "dm-writecache: waiting for lock for %lld.%03lldus at %d\n", wc->max_lock_wait / 1000, wc->max_lock_wait % 1000, line);
+		after = ktime_get();
+	}
+	wc->lock_acquired_time = after;
+#endif
+}
+#define wc_lock(wc)	__wc_lock(wc, __LINE__)
+
+static void __wc_unlock(struct dm_writecache *wc, int line)
+{
+#ifdef WC_MEASURE_LATENCY
+	ktime_t now = ktime_get();
+	if (now - wc->lock_acquired_time > wc->max_lock_held) {
+		wc->max_lock_held = now - wc->lock_acquired_time;
+		printk(KERN_DEBUG "dm-writecache: lock held for %lld.%03lldus at %d\n", wc->max_lock_held / 1000, wc->max_lock_held % 1000, line);
+	}
+#endif
+	mutex_unlock(&wc->lock);
+}
+#define wc_unlock(wc)	__wc_unlock(wc, __LINE__)
+
+#define wc_unlock_long(wc)	mutex_unlock(&wc->lock)
+
+static struct wc_memory_superblock *sb(struct dm_writecache *wc)
+{
+	return wc->memory_map;
+}
+
+static struct wc_memory_entry *memory_entry(struct dm_writecache *wc, struct wc_entry *e)
+{
+	if (is_power_of_2(sizeof(struct wc_entry)) && 0)
+		return &sb(wc)->entries[e - wc->entries];
+	else
+		return &sb(wc)->entries[e->index];
+}
+
+static void *memory_data(struct dm_writecache *wc, struct wc_entry *e)
+{
+	return (char *)wc->block_start + (e->index << wc->block_size_bits);
+}
+
+static sector_t cache_sector(struct dm_writecache *wc, struct wc_entry *e)
+{
+	return wc->metadata_sectors +
+		((sector_t)e->index << (wc->block_size_bits - SECTOR_SHIFT));
+}
+
+static uint64_t read_original_sector(struct dm_writecache *wc, struct wc_entry *e)
+{
+#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
+	return e->original_sector;
+#else
+	return le64_to_cpu(memory_entry(wc, e)->original_sector);
+#endif
+}
+
+static uint64_t read_seq_count(struct dm_writecache *wc, struct wc_entry *e)
+{
+#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
+	return e->seq_count;
+#else
+	return le64_to_cpu(memory_entry(wc, e)->seq_count);
+#endif
+}
+
+static void clear_seq_count(struct dm_writecache *wc, struct wc_entry *e)
+{
+#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
+	e->seq_count = -1;
+#endif
+	NT_STORE(memory_entry(wc, e)->seq_count, cpu_to_le64(-1));
+}
+
+static void write_original_sector_seq_count(struct dm_writecache *wc, struct wc_entry *e,
+					    uint64_t original_sector, uint64_t seq_count)
+{
+	struct wc_memory_entry *wme;
+#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
+	e->original_sector = original_sector;
+	e->seq_count = seq_count;
+#endif
+	wme = memory_entry(wc, e);
+	NT_STORE(wme->original_sector, cpu_to_le64(original_sector));
+	NT_STORE(wme->seq_count, cpu_to_le64(seq_count));
+}
+
+#define writecache_error(wc, err, msg, arg...)				\
+do {									\
+	if (!cmpxchg(&(wc)->error, 0, err))				\
+		DMERR(msg, ##arg);					\
+	swake_up(&(wc)->freelist_wait);					\
+} while (0)
+
+#define writecache_has_error(wc)	(unlikely(ACCESS_ONCE((wc)->error)))
+
+static void writecache_flush_all_metadata(struct dm_writecache *wc)
+{
+	if (WC_MODE_PMEM(wc)) {
+		persistent_memory_flush(&wc->pmem_holder, sb(wc),
+			offsetof(struct wc_memory_superblock, entries[wc->n_blocks]));
+	} else {
+		memset(wc->dirty_bitmap, -1, wc->dirty_bitmap_size);
+	}
+}
+
+static void writecache_flush_region(struct dm_writecache *wc, void *ptr, size_t size)
+{
+	if (WC_MODE_PMEM(wc))
+		persistent_memory_flush(&wc->pmem_holder, ptr, size);
+	else
+		__set_bit(((char *)ptr - (char *)wc->memory_map) / BITMAP_GRANULARITY,
+			  wc->dirty_bitmap);
+}
+
+static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev);
+
+struct io_notify {
+	struct dm_writecache *wc;
+	struct completion c;
+	atomic_t count;
+};
+
+void writecache_notify_io(unsigned long error, void *context)
+{
+	struct io_notify *endio = context;
+
+	if (unlikely(error != 0))
+		writecache_error(endio->wc, -EIO, "error writing metadata");
+	BUG_ON(atomic_read(&endio->count) <= 0);
+	if (atomic_dec_and_test(&endio->count))
+		complete(&endio->c);
+}
+
+static void ssd_commit_flushed(struct dm_writecache *wc)
+{
+	int r;
+	struct dm_io_region region;
+	struct dm_io_request req;
+	struct io_notify endio = {
+		wc,
+		COMPLETION_INITIALIZER_ONSTACK(endio.c),
+		ATOMIC_INIT(1),
+	};
+	unsigned bitmap_bits = wc->dirty_bitmap_size * BITS_PER_LONG;
+	unsigned i = 0;
+
+	while (1) {
+		unsigned j;
+		i = find_next_bit(wc->dirty_bitmap, bitmap_bits, i);
+		if (unlikely(i == bitmap_bits))
+			break;
+		j = find_next_zero_bit(wc->dirty_bitmap, bitmap_bits, i);
+
+		region.bdev = wc->ssd_dev->bdev;
+		region.sector = (sector_t)i * (BITMAP_GRANULARITY >> SECTOR_SHIFT);
+		region.count = (sector_t)(j - i) * (BITMAP_GRANULARITY >> SECTOR_SHIFT);
+
+		if (unlikely(region.sector >= wc->metadata_sectors))
+			break;
+		if (unlikely(region.sector + region.count > wc->metadata_sectors))
+			region.count = wc->metadata_sectors - region.sector;
+
+		atomic_inc(&endio.count);
+		req.bi_op = REQ_OP_WRITE;
+		req.bi_op_flags = REQ_SYNC;
+		req.mem.type = DM_IO_VMA;
+		req.mem.ptr.vma = (char *)wc->memory_map + (size_t)i * BITMAP_GRANULARITY;
+		req.client = wc->dm_io;
+		req.notify.fn = writecache_notify_io;
+		req.notify.context = &endio;
+
+		r = dm_io(&req, 1, &region, NULL);
+		if (unlikely(r)) {
+			/*
+			 * Async dm-io (implied by notify.fn above) won't return an error, but
+			 * if that changes in the future we must catch it: so panic in defense.
+			 */
+			panic(DM_NAME ": " DM_MSG_PREFIX ": dm io error %d", r);
+		}
+		i = j;
+	}
+
+	writecache_notify_io(0, &endio);
+	wait_for_completion_io(&endio.c);
+
+	writecache_disk_flush(wc, wc->ssd_dev);
+
+	memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size);
+}
+
+static void writecache_commit_flushed(struct dm_writecache *wc)
+{
+	if (WC_MODE_PMEM(wc))
+		persistent_memory_commit_flushed();
+	else
+		ssd_commit_flushed(wc);
+}
+
+static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev)
+{
+	int r;
+	struct dm_io_region region;
+	struct dm_io_request req;
+
+	region.bdev = dev->bdev;
+	region.sector = 0;
+	region.count = 0;
+	req.bi_op = REQ_OP_WRITE;
+	req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC;
+	req.mem.type = DM_IO_KMEM;
+	req.mem.ptr.addr = NULL;
+	req.client = wc->dm_io;
+	req.notify.fn = NULL;
+
+	r = dm_io(&req, 1, &region, NULL);
+	if (unlikely(r))
+		writecache_error(wc, r, "error flushing metadata: %d", r);
+}
+
+static void writecache_wait_for_ios(struct dm_writecache *wc, int direction)
+{
+	swait_event(wc->bio_in_progress_wait[direction],
+		   !atomic_read(&wc->bio_in_progress[direction]));
+}
+
+#define WFE_RETURN_FOLLOWING	1
+#define WFE_LOWEST_SEQ		2
+
+static struct wc_entry *writecache_find_entry(struct dm_writecache *wc, uint64_t block, int flags)
+{
+	struct wc_entry *e;
+	struct rb_node *node = wc->tree.rb_node;
+
+	if (unlikely(!node))
+		return NULL;
+
+	while (1) {
+		e = container_of(node, struct wc_entry, rb_node);
+		if (read_original_sector(wc, e) == block)
+			break;
+		node = (read_original_sector(wc, e) >= block ?
+			e->rb_node.rb_left : e->rb_node.rb_right);
+		//prefetch(node);
+		if (unlikely(!node)) {
+			if (!(flags & WFE_RETURN_FOLLOWING)) {
+				return NULL;
+			}
+			if (read_original_sector(wc, e) >= block) {
+				break;
+			} else {
+				node = rb_next(&e->rb_node);
+				if (unlikely(!node)) {
+					return NULL;
+				}
+				e = container_of(node, struct wc_entry, rb_node);
+				break;
+			}
+		}
+	}
+
+	while (1) {
+		struct wc_entry *e2;
+		if (flags & WFE_LOWEST_SEQ)
+			node = rb_prev(&e->rb_node);
+		else
+			node = rb_next(&e->rb_node);
+		if (!node)
+			return e;
+		e2 = container_of(node, struct wc_entry, rb_node);
+		if (read_original_sector(wc, e2) != block)
+			return e;
+		e = e2;
+	}
+}
+
+static void writecache_insert_entry(struct dm_writecache *wc, struct wc_entry *ins)
+{
+	struct wc_entry *e;
+	struct rb_node **node = &wc->tree.rb_node, *parent = NULL;
+
+	while (*node) {
+		e = container_of(*node, struct wc_entry, rb_node);
+		parent = &e->rb_node;
+		node = read_original_sector(wc, e) > read_original_sector(wc, ins) ?
+			&parent->rb_left : &parent->rb_right;
+	}
+	rb_link_node(&ins->rb_node, parent, node);
+	rb_insert_color(&ins->rb_node, &wc->tree);
+	list_add(&ins->lru, &wc->lru);
+}
+
+static void writecache_unlink(struct dm_writecache *wc, struct wc_entry *e)
+{
+	list_del(&e->lru);
+	rb_erase(&e->rb_node, &wc->tree);
+}
+
+static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry *e)
+{
+	list_add_tail(&e->lru, &wc->freelist);
+	wc->freelist_size++;
+}
+
+struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc)
+{
+	struct wc_entry *e;
+
+	if (unlikely(list_empty(&wc->freelist)))
+		return NULL;
+	e = container_of(wc->freelist.next, struct wc_entry, lru);
+	list_del(&e->lru);
+	wc->freelist_size--;
+	if (unlikely(wc->freelist_size <= wc->freelist_high_watermark))
+		queue_work(wc->writeback_wq, &wc->writeback_work);
+
+	return e;
+}
+
+static void writecache_free_entry(struct dm_writecache *wc, struct wc_entry *e)
+{
+	writecache_unlink(wc, e);
+	writecache_add_to_freelist(wc, e);
+	clear_seq_count(wc, e);
+	writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry));
+	if (unlikely(swait_active(&wc->freelist_wait)))
+		swake_up(&wc->freelist_wait);
+}
+
+static void __writecache_wait_on_freelist(struct dm_writecache *wc, bool measure, int line)
+{
+	DECLARE_SWAITQUEUE(wait);
+#ifdef WC_MEASURE_LATENCY
+	ktime_t before, after;
+#endif
+
+	prepare_to_swait(&wc->freelist_wait, &wait, TASK_UNINTERRUPTIBLE);
+	wc_unlock(wc);
+#ifdef WC_MEASURE_LATENCY
+	if (measure)
+		before = ktime_get();
+#endif
+	io_schedule();
+	finish_swait(&wc->freelist_wait, &wait);
+#ifdef WC_MEASURE_LATENCY
+	if (measure) {
+		after = ktime_get();
+		if (unlikely(after - before > wc->max_freelist_wait)) {
+			wc->max_freelist_wait = after - before;
+			printk(KERN_DEBUG "dm-writecache: waiting on freelist for %lld.%03lldus at %d\n", wc->max_freelist_wait / 1000, wc->max_freelist_wait % 1000, line);
+		}
+	}
+#endif
+	wc_lock(wc);
+}
+#define writecache_wait_on_freelist(wc)		__writecache_wait_on_freelist(wc, true, __LINE__)
+#define writecache_wait_on_freelist_long(wc)	__writecache_wait_on_freelist(wc, false, __LINE__)
+
+static void writecache_poison_lists(struct dm_writecache *wc)
+{
+	/*
+	 * Catch incorrect access to these values while the device is suspended.
+	 */
+	memset(&wc->tree, -1, sizeof wc->tree);
+	wc->lru.next = LIST_POISON1;
+	wc->lru.prev = LIST_POISON2;
+	wc->freelist.next = LIST_POISON1;
+	wc->freelist.prev = LIST_POISON2;
+}
+
+static void writecache_flush_entry(struct dm_writecache *wc, struct wc_entry *e)
+{
+	writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry));
+	if (WC_MODE_PMEM(wc))
+		writecache_flush_region(wc, memory_data(wc, e), wc->block_size);
+}
+
+static bool writecache_entry_is_committed(struct dm_writecache *wc, struct wc_entry *e)
+{
+	return read_seq_count(wc, e) < wc->seq_count;
+}
+
+static void writecache_flush(struct dm_writecache *wc)
+{
+	struct wc_entry *e, *e2;
+	bool need_flush_after_free;
+
+	wc->uncommitted_blocks = 0;
+	del_timer(&wc->autocommit_timer);
+
+	if (list_empty(&wc->lru))
+		return;
+
+	e = container_of(wc->lru.next, struct wc_entry, lru);
+	if (writecache_entry_is_committed(wc, e)) {
+		if (wc->overwrote_committed) {
+			writecache_wait_for_ios(wc, WRITE);
+			writecache_disk_flush(wc, wc->ssd_dev);
+			wc->overwrote_committed = false;
+		}
+		return;
+	}
+	while (1) {
+		writecache_flush_entry(wc, e);
+		if (unlikely(e->lru.next == &wc->lru))
+			break;
+		e2 = container_of(e->lru.next, struct wc_entry, lru);
+		if (writecache_entry_is_committed(wc, e2))
+			break;
+		e = e2;
+		cond_resched();
+	}
+	writecache_commit_flushed(wc);
+
+	writecache_wait_for_ios(wc, WRITE);
+
+	wc->seq_count++;
+	NT_STORE(sb(wc)->seq_count, cpu_to_le64(wc->seq_count));
+	writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count);
+	writecache_commit_flushed(wc);
+
+	wc->overwrote_committed = false;
+
+	need_flush_after_free = false;
+	while (1) {
+		/* Free another committed entry with lower seq-count */
+		struct rb_node *rb_node = rb_prev(&e->rb_node);
+
+		if (rb_node) {
+			e2 = container_of(rb_node, struct wc_entry, rb_node);
+			if (read_original_sector(wc, e2) == read_original_sector(wc, e) &&
+			    likely(!e2->write_in_progress)) {
+				writecache_free_entry(wc, e2);
+				need_flush_after_free = true;
+			}
+		}
+		if (unlikely(e->lru.prev == &wc->lru))
+			break;
+		e = container_of(e->lru.prev, struct wc_entry, lru);
+		cond_resched();
+	}
+
+	if (need_flush_after_free)
+		writecache_commit_flushed(wc);
+}
+
+static void writecache_flush_work(struct work_struct *work)
+{
+	struct dm_writecache *wc = container_of(work, struct dm_writecache, flush_work);
+	wc_lock(wc);
+	writecache_flush(wc);
+	wc_unlock(wc);
+}
+
+static void writecache_autocommit_timer(unsigned long data)
+{
+	struct dm_writecache *wc = (struct dm_writecache *)data;
+	if (!writecache_has_error(wc))
+		queue_work(wc->writeback_wq, &wc->flush_work);
+}
+
+static void writecache_schedule_autocommit(struct dm_writecache *wc)
+{
+	if (!timer_pending(&wc->autocommit_timer))
+		mod_timer(&wc->autocommit_timer, jiffies + wc->autocommit_jiffies);
+}
+
+static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_t end)
+{
+	struct wc_entry *e;
+	bool discarded_something = false;
+
+	e = writecache_find_entry(wc, start, WFE_RETURN_FOLLOWING | WFE_LOWEST_SEQ);
+	if (unlikely(!e))
+		return;
+
+	while (read_original_sector(wc, e) < end) {
+		struct rb_node *node = rb_next(&e->rb_node);
+
+		if (likely(!e->write_in_progress)) {
+			if (!discarded_something) {
+				writecache_wait_for_ios(wc, READ);
+				writecache_wait_for_ios(wc, WRITE);
+				discarded_something = true;
+			}
+			writecache_free_entry(wc, e);
+		}
+
+		if (!node)
+			break;
+
+		e = container_of(node, struct wc_entry, rb_node);
+	}
+
+	if (discarded_something)
+		writecache_commit_flushed(wc);
+}
+
+static bool writecache_wait_for_writeback(struct dm_writecache *wc)
+{
+	if (wc->writeback_size) {
+		writecache_wait_on_freelist(wc);
+		return true;
+	}
+	return false;
+}
+
+static void writecache_suspend(struct dm_target *ti)
+{
+	struct dm_writecache *wc = ti->private;
+
+	del_timer_sync(&wc->autocommit_timer);
+
+	flush_workqueue(wc->writeback_wq);
+
+	wc_lock(wc);
+	writecache_flush(wc);
+	while (writecache_wait_for_writeback(wc));
+	while (wc->writeback_all) {
+		wc_unlock_long(wc);
+		msleep(1);
+		wc_lock(wc);
+	}
+	wc_unlock_long(wc);
+
+	writecache_poison_lists(wc);
+}
+
+static int writecache_alloc_entries(struct dm_writecache *wc)
+{
+	size_t b;
+	if (wc->entries)
+		return 0;
+	wc->entries = vmalloc(sizeof(struct wc_entry) * wc->n_blocks);
+	if (!wc->entries)
+		return -ENOMEM;
+	for (b = 0; b < wc->n_blocks; b++) {
+		struct wc_entry *e = &wc->entries[b];
+		e->index = b;
+		e->write_in_progress = false;
+	}
+	return 0;
+}
+
+static void writecache_resume(struct dm_target *ti)
+{
+	struct dm_writecache *wc = ti->private;
+	size_t b;
+	bool need_flush = false;
+	uint64_t sb_seq_count;
+	int r;
+
+	wc_lock(wc);
+
+	wc->tree = RB_ROOT;
+	INIT_LIST_HEAD(&wc->lru);
+	INIT_LIST_HEAD(&wc->freelist);
+	wc->freelist_size = 0;
+
+	r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t));
+	if (r) {
+		writecache_error(wc, r, "hardware memory error when reading superblock: %d", r);
+		sb_seq_count = cpu_to_le64(0);
+	}
+	wc->seq_count = le64_to_cpu(sb_seq_count);
+
+#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
+	for (b = 0; b < wc->n_blocks; b++) {
+		struct wc_entry *e = &wc->entries[b];
+		struct wc_memory_entry wme;
+		if (writecache_has_error(wc)) {
+			e->original_sector = -1;
+			e->seq_count = -1;
+			continue;
+		}
+		r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry));
+		if (r) {
+			writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d", (unsigned long)b, r);
+			e->original_sector = -1;
+			e->seq_count = -1;
+		} else {
+			e->original_sector = le64_to_cpu(wme.original_sector);
+			e->seq_count = le64_to_cpu(wme.seq_count);
+		}
+	}
+#endif
+	for (b = 0; b < wc->n_blocks; b++) {
+		struct wc_entry *e = &wc->entries[b];
+		if (!writecache_entry_is_committed(wc, e)) {
+			if (read_seq_count(wc, e) != -1) {
+erase_this:
+				clear_seq_count(wc, e);
+				need_flush = true;
+			}
+			writecache_add_to_freelist(wc, e);
+		} else {
+			struct wc_entry *old;
+
+			old = writecache_find_entry(wc, read_original_sector(wc, e), 0);
+			if (!old) {
+				writecache_insert_entry(wc, e);
+			} else {
+				if (read_seq_count(wc, old) == read_seq_count(wc, e)) {
+					writecache_error(wc, -EINVAL, "two identical entries, position %llu, sector %llu, sequence %llu",
+						 (unsigned long long)b, (unsigned long long)read_original_sector(wc, e),
+						 (unsigned long long)read_seq_count(wc, e));
+				}
+				if (read_seq_count(wc, old) > read_seq_count(wc, e)) {
+					goto erase_this;
+				} else {
+					writecache_free_entry(wc, old);
+					writecache_insert_entry(wc, e);
+					need_flush = true;
+				}
+			}
+		}
+		cond_resched();
+	}
+
+	if (need_flush) {
+		writecache_flush_all_metadata(wc);
+		writecache_commit_flushed(wc);
+	}
+
+	wc_unlock_long(wc);
+}
+
+static int process_flush_mesg(unsigned argc, char **argv, struct dm_writecache *wc)
+{
+	uint64_t seq;
+
+	if (argc != 1)
+		return -EINVAL;
+
+	wc_lock(wc);
+	if (dm_suspended(wc->ti)) {
+		wc_unlock(wc);
+		return -EBUSY;
+	}
+	if (writecache_has_error(wc)) {
+		wc_unlock(wc);
+		return -EIO;
+	}
+
+	writecache_flush(wc);
+	seq = wc->seq_count;
+	wc->writeback_all++;
+	wc_unlock(wc);
+
+	queue_work(wc->writeback_wq, &wc->writeback_work);
+	flush_workqueue(wc->writeback_wq);
+
+	wc_lock(wc);
+	wc->writeback_all--;
+	if (dm_suspended(wc->ti)) {
+		wc_unlock(wc);
+		return -EBUSY;
+	}
+	if (writecache_has_error(wc)) {
+		wc_unlock(wc);
+		return -EIO;
+	}
+	wc_unlock(wc);
+
+	return 0;
+}
+
+static int writecache_message(struct dm_target *ti, unsigned argc, char **argv)
+{
+	int r = -EINVAL;
+	struct dm_writecache *wc = ti->private;
+
+	if (!strcasecmp(argv[0], "flush"))
+		r = process_flush_mesg(argc, argv, wc);
+	else
+		DMWARN("unrecognised message received: %s", argv[0]);
+
+	return r;
+}
+
+static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data)
+{
+	void *buf;
+	unsigned long flags;
+	unsigned size;
+	int rw = bio_data_dir(bio);
+	unsigned remaining_size = wc->block_size;
+
+	do {
+		buf = bio_kmap_irq(bio, &flags, &size);
+		if (unlikely(size > remaining_size))
+			size = remaining_size;
+
+		if (rw == READ) {
+			int r;
+			r = memcpy_mcsafe(buf, data, size);
+			flush_dcache_page(bio_page(bio));
+			if (unlikely(r)) {
+				writecache_error(wc, r, "hardware memory error when reading data: %d", r);
+				bio->bi_status = BLK_STS_IOERR;
+			}
+		} else {
+			flush_dcache_page(bio_page(bio));
+			memcpy_flushcache(data, buf, size);
+		}
+
+		bio_kunmap_irq(buf, &flags);
+
+		data = (char *)data + size;
+		remaining_size -= size;
+		bio_advance(bio, size);
+	} while (unlikely(remaining_size));
+}
+
+static int writecache_flush_thread(void *data)
+{
+	struct dm_writecache *wc = data;
+
+	while (!kthread_should_stop()) {
+		struct bio *bio;
+
+		bio = wc->flush_bio;
+		if (unlikely(!bio)) {
+		} else if (bio_op(bio) == REQ_OP_DISCARD) {
+			writecache_discard(wc, bio->bi_iter.bi_sector,
+					   bio->bi_iter.bi_sector + (bio->bi_iter.bi_size >> SECTOR_SHIFT));
+		} else {
+			writecache_flush(wc);
+		}
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		wc->flush_bio = (void *)0x600 + POISON_POINTER_DELTA;	/* for debugging - catch uninitialized use */
+		complete(&wc->flush_completion);
+
+		schedule();
+	}
+
+	set_current_state(TASK_RUNNING);
+
+	return 0;
+}
+
+static void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio)
+{
+	wc->flush_bio = bio;
+	reinit_completion(&wc->flush_completion);
+	wake_up_process(wc->flush_thread);
+	wait_for_completion_io(&wc->flush_completion);
+}
+
+static int writecache_map(struct dm_target *ti, struct bio *bio)
+{
+	struct wc_entry *e;
+	struct dm_writecache *wc = ti->private;
+
+	bio->bi_private = NULL;
+
+	wc_lock(wc);
+
+	if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
+		if (writecache_has_error(wc))
+			goto unlock_error;
+		if (WC_MODE_PMEM(wc))
+			writecache_flush(wc);
+		else
+			writecache_offload_bio(wc, bio);
+		goto unlock_ok_flush;
+	}
+
+	bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
+
+	if (unlikely((((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
+				(wc->block_size / 512 - 1)) != 0)) {
+		DMWARN("I/O is not aligned, sector %llu, size %u, block size %u",
+			(unsigned long long)bio->bi_iter.bi_sector,
+			bio->bi_iter.bi_size, wc->block_size);
+		goto unlock_error;
+	}
+
+	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
+		if (writecache_has_error(wc))
+			goto unlock_error;
+		if (WC_MODE_PMEM(wc))
+			writecache_discard(wc, bio->bi_iter.bi_sector,
+					   bio->bi_iter.bi_sector + (bio->bi_iter.bi_size >> SECTOR_SHIFT));
+		else
+			writecache_offload_bio(wc, bio);
+		goto unlock_remap_origin;
+	}
+
+	if (bio_data_dir(bio) == READ) {
+next_block:
+		e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
+		if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) {
+			if (WC_MODE_PMEM(wc)) {
+				bio_copy_block(wc, bio, memory_data(wc, e));
+				if (bio->bi_iter.bi_size)
+					goto next_block;
+				goto unlock_ok_read;
+			} else {
+				dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT);
+				bio_set_dev(bio, wc->ssd_dev->bdev);
+				bio->bi_iter.bi_sector = cache_sector(wc, e);
+				if (!writecache_entry_is_committed(wc, e))
+					writecache_wait_for_ios(wc, WRITE);
+				goto unlock_remap;
+			}
+		} else {
+			if (e) {
+				sector_t next_boundary =
+					read_original_sector(wc, e) - bio->bi_iter.bi_sector;
+				if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) {
+					dm_accept_partial_bio(bio, next_boundary);
+				}
+			}
+			goto unlock_remap_origin;
+		}
+	} else {
+		do {
+			if (writecache_has_error(wc))
+				goto unlock_error;
+			e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0);
+			if (e) {
+				if (!writecache_entry_is_committed(wc, e))
+					goto bio_copy;
+				if (!WC_MODE_PMEM(wc) && !e->write_in_progress) {
+					wc->overwrote_committed = true;
+					goto bio_copy;
+				}
+			}
+			e = writecache_pop_from_freelist(wc);
+			if (unlikely(!e)) {
+				writecache_wait_on_freelist(wc);
+				continue;
+			}
+			write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count);
+			writecache_insert_entry(wc, e);
+			wc->uncommitted_blocks++;
+bio_copy:
+			if (WC_MODE_PMEM(wc)) {
+				bio_copy_block(wc, bio, memory_data(wc, e));
+			} else {
+				dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT);
+				bio_set_dev(bio, wc->ssd_dev->bdev);
+				bio->bi_iter.bi_sector = cache_sector(wc, e);
+				if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) {
+					wc->uncommitted_blocks = 0;
+					queue_work(wc->writeback_wq, &wc->flush_work);
+				} else {
+					writecache_schedule_autocommit(wc);
+				}
+				goto unlock_remap;
+			}
+		} while (bio->bi_iter.bi_size);
+
+		if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) {
+			writecache_flush(wc);
+		} else {
+			writecache_schedule_autocommit(wc);
+		}
+
+		goto unlock_ok_write;
+	}
+
+unlock_remap_origin:
+	bio_set_dev(bio, wc->dev->bdev);
+	wc_unlock(wc);
+	return DM_MAPIO_REMAPPED;
+
+unlock_remap:
+	bio->bi_private = (void *)1;	/* make sure that writecache_end_io decrements bio_in_progress */
+	atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]);
+	wc_unlock(wc);
+	return DM_MAPIO_REMAPPED;
+
+unlock_ok_flush:
+#ifdef WC_MEASURE_LATENCY
+	wc_unlock(wc);
+	bio_endio(bio);
+	return DM_MAPIO_SUBMITTED;
+#endif
+
+unlock_ok_read:
+#ifdef WC_MEASURE_LATENCY
+	wc_unlock(wc);
+	bio_endio(bio);
+	return DM_MAPIO_SUBMITTED;
+#endif
+
+unlock_ok_write:
+	wc_unlock(wc);
+	bio_endio(bio);
+	return DM_MAPIO_SUBMITTED;
+
+unlock_error:
+	wc_unlock(wc);
+	bio_io_error(bio);
+	return DM_MAPIO_SUBMITTED;
+}
+
+static int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status)
+{
+	struct dm_writecache *wc = ti->private;
+
+	if (bio->bi_private != NULL) {
+		int dir = bio_data_dir(bio);
+		if (atomic_dec_and_test(&wc->bio_in_progress[dir]))
+			if (unlikely(swait_active(&wc->bio_in_progress_wait[dir])))
+				swake_up(&wc->bio_in_progress_wait[dir]);
+	}
+	return 0;
+}
+
+static int writecache_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data)
+{
+	struct dm_writecache *wc = ti->private;
+
+	return fn(ti, wc->dev, 0, ti->len, data);
+}
+
+static void writecache_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+	struct dm_writecache *wc = ti->private;
+
+	if (limits->logical_block_size < wc->block_size)
+		limits->logical_block_size = wc->block_size;
+
+	if (limits->physical_block_size < wc->block_size)
+		limits->physical_block_size = wc->block_size;
+
+	if (limits->io_min < wc->block_size)
+		limits->io_min = wc->block_size;
+}
+
+
+static void writecache_writeback_endio(struct bio *bio)
+{
+	struct writeback_struct *wb = container_of(bio, struct writeback_struct, bio);
+	struct dm_writecache *wc = wb->wc;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&wc->endio_thread_wait.lock, flags);
+	list_add_tail(&wb->endio_entry, &wc->endio_list);
+	swake_up_locked(&wc->endio_thread_wait);
+	raw_spin_unlock_irqrestore(&wc->endio_thread_wait.lock, flags);
+}
+
+static void writecache_copy_endio(int read_err, unsigned long write_err, void *ptr)
+{
+	struct copy_struct *c = ptr;
+	struct dm_writecache *wc = c->wc;
+
+	c->error = likely(!(read_err | write_err)) ? 0 : -EIO;
+
+	raw_spin_lock_irq(&wc->endio_thread_wait.lock);
+	list_add_tail(&c->endio_entry, &wc->endio_list);
+	swake_up_locked(&wc->endio_thread_wait);
+	raw_spin_unlock_irq(&wc->endio_thread_wait.lock);
+}
+
+static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list)
+{
+	unsigned i;
+	struct writeback_struct *wb;
+	struct wc_entry *e;
+	unsigned long n_walked = 0;
+
+	do {
+		wb = list_entry(list->next, struct writeback_struct, endio_entry);
+		list_del(&wb->endio_entry);
+
+		if (!wc->pmem_holder.page_backed) {
+			struct bio_vec *bv;
+			bio_for_each_segment_all(bv, &wb->bio, i)
+				mempool_free(bv->bv_page, wc->page_pool);
+		}
+
+		if (unlikely(wb->bio.bi_status != BLK_STS_OK))
+			writecache_error(wc, blk_status_to_errno(wb->bio.bi_status),
+					"write error %d", wb->bio.bi_status);
+		i = 0;
+		do {
+			e = wb->wc_list[i];
+			BUG_ON(!e->write_in_progress);
+			e->write_in_progress = false;
+			INIT_LIST_HEAD(&e->lru);
+			if (!writecache_has_error(wc))
+				writecache_free_entry(wc, e);
+			BUG_ON(!wc->writeback_size);
+			wc->writeback_size--;
+			n_walked++;
+			if (unlikely(n_walked >= ENDIO_LATENCY)) {
+				writecache_commit_flushed(wc);
+				wc_unlock(wc);
+				wc_lock(wc);
+				n_walked = 0;
+			}
+		} while (++i < wb->wc_list_n);
+
+		if (wb->wc_list != wb->wc_list_inline)
+			kfree(wb->wc_list);
+		bio_put(&wb->bio);
+	} while (!list_empty(list));
+}
+
+static void __writecache_endio_ssd(struct dm_writecache *wc, struct list_head *list)
+{
+	struct copy_struct *c;
+	struct wc_entry *e;
+
+	do {
+		c = list_entry(list->next, struct copy_struct, endio_entry);
+		list_del(&c->endio_entry);
+
+		if (unlikely(c->error))
+			writecache_error(wc, c->error, "copy error");
+
+		e = c->e;
+		do {
+			BUG_ON(!e->write_in_progress);
+			e->write_in_progress = false;
+			INIT_LIST_HEAD(&e->lru);
+			if (!writecache_has_error(wc))
+				writecache_free_entry(wc, e);
+
+			BUG_ON(!wc->writeback_size);
+			wc->writeback_size--;
+			e++;
+		} while (--c->n_entries);
+		mempool_free(c, wc->copy_pool);
+	} while (!list_empty(list));
+}
+
+static int writecache_endio_thread(void *data)
+{
+	struct dm_writecache *wc = data;
+
+	while (1) {
+		DECLARE_SWAITQUEUE(wait);
+		struct list_head list;
+
+		raw_spin_lock_irq(&wc->endio_thread_wait.lock);
+continue_locked:
+		if (!list_empty(&wc->endio_list))
+			goto pop_from_list;
+		set_current_state(TASK_INTERRUPTIBLE);
+		__prepare_to_swait(&wc->endio_thread_wait, &wait);
+		raw_spin_unlock_irq(&wc->endio_thread_wait.lock);
+
+		if (unlikely(kthread_should_stop())) {
+			finish_swait(&wc->endio_thread_wait, &wait);
+			break;
+		}
+
+		schedule();
+
+		raw_spin_lock_irq(&wc->endio_thread_wait.lock);
+		__finish_swait(&wc->endio_thread_wait, &wait);
+		goto continue_locked;
+
+pop_from_list:
+		list = wc->endio_list;
+		list.next->prev = list.prev->next = &list;
+		INIT_LIST_HEAD(&wc->endio_list);
+		raw_spin_unlock_irq(&wc->endio_thread_wait.lock);
+
+		writecache_disk_flush(wc, wc->dev);
+
+		wc_lock(wc);
+
+		if (WC_MODE_PMEM(wc)) {
+			__writecache_endio_pmem(wc, &list);
+		} else {
+			__writecache_endio_ssd(wc, &list);
+			writecache_wait_for_ios(wc, READ);
+		}
+
+		writecache_commit_flushed(wc);
+
+		wc_unlock(wc);
+	}
+
+	return 0;
+}
+
+static bool wc_add_block(struct writeback_struct *wb, struct wc_entry *e, gfp_t gfp)
+{
+	struct dm_writecache *wc = wb->wc;
+	unsigned block_size = wc->block_size;
+	int r;
+
+	if (wc->pmem_holder.page_backed) {
+		void *address = memory_data(wc, e);
+		persistent_memory_flush_cache(address, block_size);
+		return bio_add_page(&wb->bio, persistent_memory_page(address),
+				    block_size, persistent_memory_page_offset(address)) != 0;
+	}
+
+	if (wb->page_offset == PAGE_SIZE) {
+		wb->page = mempool_alloc(wc->page_pool, gfp);
+		if (unlikely(!wb->page))
+			return false;
+		wb->page_offset = 0;
+	}
+
+#if 1
+	r = memcpy_mcsafe((char *)page_address(wb->page) + wb->page_offset, memory_data(wc, e), block_size);
+	if (unlikely(r))
+		writecache_error(wc, r, "hardware memory error when attempting to writeback block %lu: %d", (unsigned long)e->index, r);
+#else
+	memcpy_flushcache((char *)page_address(wb->page) + wb->page_offset, memory_data(wc, e), block_size);
+#endif
+
+	if (unlikely(!bio_add_page(&wb->bio, wb->page, block_size, wb->page_offset))) {
+		if (!wb->page_offset) {
+			mempool_free(wb->page, wc->page_pool);
+			wb->page_offset = PAGE_SIZE;
+		}
+		return false;
+	}
+	wb->page_offset += block_size;
+	return true;
+}
+
+struct writeback_list {
+	struct list_head list;
+	size_t size;
+};
+
+static void __writeback_throttle(struct dm_writecache *wc, struct writeback_list *wbl)
+{
+	if (ACCESS_ONCE(wc->writeback_size) - wbl->size >= wc->max_writeback_jobs) {
+		wc_lock(wc);
+		while (wc->writeback_size - wbl->size >= wc->max_writeback_jobs) {
+			writecache_wait_on_freelist_long(wc);
+		}
+		wc_unlock(wc);
+	}
+	cond_resched();
+}
+
+static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeback_list *wbl)
+{
+	struct wc_entry *e, *f;
+	struct bio *bio;
+	struct writeback_struct *wb;
+	unsigned max_pages;
+
+	while (wbl->size) {
+		wbl->size--;
+		e = container_of(wbl->list.prev, struct wc_entry, lru);
+		list_del(&e->lru);
+
+		max_pages = e->wc_list_contiguous;
+
+		bio = bio_alloc_bioset(GFP_NOIO, max_pages, wc->bio_set);
+		wb = container_of(bio, struct writeback_struct, bio);
+		wb->wc = wc;
+		wb->bio.bi_end_io = writecache_writeback_endio;
+		bio_set_dev(&wb->bio, wc->dev->bdev);
+		wb->bio.bi_iter.bi_sector = read_original_sector(wc, e);
+		wb->page_offset = PAGE_SIZE;
+		if (max_pages > WB_LIST_INLINE) {
+			wb->wc_list = kmalloc(max_pages * sizeof(struct wc_entry *),
+					      GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+			if (unlikely(!wb->wc_list))
+				goto use_inline_list;
+		} else {
+use_inline_list:
+			wb->wc_list = wb->wc_list_inline;
+			max_pages = WB_LIST_INLINE;
+		}
+
+		BUG_ON(!wc_add_block(wb, e, GFP_NOIO));
+
+		wb->wc_list[0] = e;
+		wb->wc_list_n = 1;
+
+		while (wbl->size && wb->wc_list_n < max_pages) {
+			f = container_of(wbl->list.prev, struct wc_entry, lru);
+			if (read_original_sector(wc, f) !=
+			    read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT))
+				break;
+			if (!wc_add_block(wb, f, GFP_NOWAIT | __GFP_NOWARN))
+				break;
+			wbl->size--;
+			list_del(&f->lru);
+			wb->wc_list[wb->wc_list_n++] = f;
+			e = f;
+		}
+		bio_set_op_attrs(&wb->bio, REQ_OP_WRITE, 0);
+		if (writecache_has_error(wc)) {
+			bio->bi_status = BLK_STS_IOERR;
+			bio_endio(&wb->bio);
+		} else {
+			submit_bio(&wb->bio);
+		}
+
+		__writeback_throttle(wc, wbl);
+
+	}
+}
+
+static void __writecache_writeback_ssd(struct dm_writecache *wc, struct writeback_list *wbl)
+{
+	struct wc_entry *e, *f;
+	struct dm_io_region from, to;
+	struct copy_struct *c;
+
+	while (wbl->size) {
+		unsigned n_sectors;
+
+		wbl->size--;
+		e = container_of(wbl->list.prev, struct wc_entry, lru);
+		list_del(&e->lru);
+
+		n_sectors = e->wc_list_contiguous << (wc->block_size_bits - SECTOR_SHIFT);
+
+		from.bdev = wc->ssd_dev->bdev;
+		from.sector = cache_sector(wc, e);
+		from.count = n_sectors;
+		to.bdev = wc->dev->bdev;
+		to.sector = read_original_sector(wc, e);
+		to.count = n_sectors;
+
+		c = mempool_alloc(wc->copy_pool, GFP_NOIO);
+		c->wc = wc;
+		c->e = e;
+		c->n_entries = e->wc_list_contiguous;
+
+		while ((n_sectors -= wc->block_size >> SECTOR_SHIFT)) {
+			wbl->size--;
+			f = container_of(wbl->list.prev, struct wc_entry, lru);
+			BUG_ON(f != e + 1);
+			list_del(&f->lru);
+			e = f;
+		}
+
+		dm_kcopyd_copy(wc->dm_kcopyd, &from, 1, &to, 0, writecache_copy_endio, c);
+
+		__writeback_throttle(wc, wbl);
+	}
+}
+
+static void writecache_writeback(struct work_struct *work)
+{
+	struct dm_writecache *wc = container_of(work, struct dm_writecache, writeback_work);
+	struct blk_plug plug;
+	struct wc_entry *e, *f, *g;
+	struct rb_node *node, *next_node;
+	struct list_head skipped;
+	struct writeback_list wbl;
+	unsigned long n_walked;
+
+	wc_lock(wc);
+restart:
+	if (writecache_has_error(wc) || unlikely(dm_suspended(wc->ti))) {
+		wc_unlock(wc);
+		return;
+	}
+
+	if (unlikely(wc->writeback_all)) {
+		if (writecache_wait_for_writeback(wc))
+			goto restart;
+	}
+
+	if (wc->overwrote_committed) {
+		writecache_wait_for_ios(wc, WRITE);
+	}
+
+	n_walked = 0;
+	INIT_LIST_HEAD(&skipped);
+	INIT_LIST_HEAD(&wbl.list);
+	wbl.size = 0;
+	while (!list_empty(&wc->lru) &&
+	       (wc->writeback_all ||
+		wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark)) {
+
+		n_walked++;
+		if (unlikely(n_walked > WRITEBACK_LATENCY) && likely(!wc->writeback_all)) {
+			queue_work(wc->writeback_wq, &wc->writeback_work);
+			break;
+		}
+
+		e = container_of(wc->lru.prev, struct wc_entry, lru);
+		BUG_ON(e->write_in_progress);
+		if (unlikely(!writecache_entry_is_committed(wc, e))) {
+			writecache_flush(wc);
+		}
+		node = rb_prev(&e->rb_node);
+		if (node) {
+			f = container_of(node, struct wc_entry, rb_node);
+			if (unlikely(read_original_sector(wc, f) ==
+				     read_original_sector(wc, e))) {
+				BUG_ON(!f->write_in_progress);
+				list_del(&e->lru);
+				list_add(&e->lru, &skipped);
+				cond_resched();
+				continue;
+			}
+		}
+		wc->writeback_size++;
+		list_del(&e->lru);
+		list_add(&e->lru, &wbl.list);
+		wbl.size++;
+		e->write_in_progress = true;
+		e->wc_list_contiguous = 1;
+
+		f = e;
+
+		while (1) {
+			next_node = rb_next(&f->rb_node);
+			if (unlikely(!next_node))
+				break;
+			g = container_of(next_node, struct wc_entry, rb_node);
+			if (read_original_sector(wc, g) ==
+			    read_original_sector(wc, f)) {
+				f = g;
+				continue;
+			}
+			if (read_original_sector(wc, g) !=
+			    read_original_sector(wc, f) + (wc->block_size >> SECTOR_SHIFT))
+				break;
+			if (unlikely(g->write_in_progress))
+				break;
+			if (unlikely(!writecache_entry_is_committed(wc, g)))
+				break;
+
+			if (!WC_MODE_PMEM(wc)) {
+				if (g != f + 1)
+					break;
+			}
+
+			n_walked++;
+			//if (unlikely(n_walked > WRITEBACK_LATENCY) && likely(!wc->writeback_all))
+			//	break;
+
+			wc->writeback_size++;
+			list_del(&g->lru);
+			list_add(&g->lru, &wbl.list);
+			wbl.size++;
+			g->write_in_progress = true;
+			g->wc_list_contiguous = BIO_MAX_PAGES;
+			f = g;
+			e->wc_list_contiguous++;
+			if (unlikely(e->wc_list_contiguous == BIO_MAX_PAGES))
+				break;
+		}
+		cond_resched();
+	}
+
+	if (!list_empty(&skipped)) {
+		list_splice_tail(&skipped, &wc->lru);
+		/*
+		 * If we didn't do any progress, we must wait until some
+		 * writeback finishes to avoid burning CPU in a loop
+		 */
+		if (unlikely(!wbl.size))
+			writecache_wait_for_writeback(wc);
+	}
+
+	wc_unlock(wc);
+
+	blk_start_plug(&plug);
+
+	if (WC_MODE_PMEM(wc))
+		__writecache_writeback_pmem(wc, &wbl);
+	else
+		__writecache_writeback_ssd(wc, &wbl);
+
+	blk_finish_plug(&plug);
+
+	if (unlikely(wc->writeback_all)) {
+		wc_lock(wc);
+		while (writecache_wait_for_writeback(wc));
+		wc_unlock(wc);
+	}
+}
+
+static int calculate_memory_size(uint64_t device_size, unsigned block_size,
+				 size_t *n_blocks_p, size_t *n_metadata_blocks_p)
+{
+	uint64_t n_blocks, offset;
+	struct wc_entry e;
+
+	n_blocks = device_size;
+	do_div(n_blocks, block_size + sizeof(struct wc_memory_entry));
+
+	while (1) {
+		if (!n_blocks)
+			return -ENOSPC;
+		/* Verify the following entries[n_blocks] won't overflow */
+		if (n_blocks >= (size_t)-sizeof(struct wc_memory_superblock) / sizeof(struct wc_memory_entry))
+			return -EFBIG;
+		offset = offsetof(struct wc_memory_superblock, entries[n_blocks]);
+		offset = (offset + block_size - 1) & ~(uint64_t)(block_size - 1);
+		if (offset + n_blocks * block_size <= device_size)
+			break;
+		n_blocks--;
+	}
+
+	/* check if the bit field overflows */
+	e.index = n_blocks;
+	if (e.index != n_blocks)
+		return -EFBIG;
+
+	if (n_blocks_p)
+		*n_blocks_p = n_blocks;
+	if (n_metadata_blocks_p)
+		*n_metadata_blocks_p = offset >> __ffs(block_size);
+	return 0;
+}
+
+static int init_memory(struct dm_writecache *wc)
+{
+	size_t b;
+	int r;
+
+	r = calculate_memory_size(wc->memory_map_size, wc->block_size, &wc->n_blocks, NULL);
+	if (r)
+		return r;
+
+	r = writecache_alloc_entries(wc);
+	if (r)
+		return r;
+
+	for (b = 0; b < ARRAY_SIZE(sb(wc)->padding); b++)
+		NT_STORE(sb(wc)->padding[b], cpu_to_le64(0));
+	NT_STORE(sb(wc)->version, cpu_to_le32(MEMORY_SUPERBLOCK_VERSION));
+	NT_STORE(sb(wc)->block_size, cpu_to_le32(wc->block_size));
+	NT_STORE(sb(wc)->n_blocks, cpu_to_le64(wc->n_blocks));
+	NT_STORE(sb(wc)->seq_count, cpu_to_le64(0));
+
+	for (b = 0; b < wc->n_blocks; b++)
+		write_original_sector_seq_count(wc, &wc->entries[b], -1, -1);
+
+	writecache_flush_all_metadata(wc);
+	writecache_commit_flushed(wc);
+	NT_STORE(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC));
+	writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic);
+	writecache_commit_flushed(wc);
+
+	return 0;
+}
+
+static void writecache_dtr(struct dm_target *ti)
+{
+	struct dm_writecache *wc = ti->private;
+
+	if (!wc)
+		return;
+
+	if (wc->endio_thread)
+		kthread_stop(wc->endio_thread);
+
+	if (wc->flush_thread)
+		kthread_stop(wc->flush_thread);
+
+	mempool_destroy(wc->page_pool);
+
+	if (wc->bio_set)
+		bioset_free(wc->bio_set);
+
+	mempool_destroy(wc->copy_pool);
+
+	if (wc->writeback_wq)
+		destroy_workqueue(wc->writeback_wq);
+
+	if (wc->dev)
+		dm_put_device(ti, wc->dev);
+
+	if (wc->ssd_dev)
+		dm_put_device(ti, wc->ssd_dev);
+
+	if (wc->entries)
+		vfree(wc->entries);
+
+	if (wc->memory_map) {
+		if (WC_MODE_PMEM(wc))
+			persistent_memory_release(ti, &wc->pmem_holder,
+						  wc->memory_map, wc->memory_map_size);
+		else
+			vfree(wc->memory_map);
+	}
+
+	kfree(wc->memory_name);
+
+	if (wc->dm_kcopyd)
+		dm_kcopyd_client_destroy(wc->dm_kcopyd);
+
+	if (wc->dm_io)
+		dm_io_client_destroy(wc->dm_io);
+
+	if (wc->dirty_bitmap)
+		vfree(wc->dirty_bitmap);
+
+	kfree(wc);
+}
+
+static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
+{
+	struct dm_writecache *wc;
+	struct dm_arg_set as;
+	const char *string;
+	unsigned opt_params;
+	size_t offset, data_size;
+	int i, r;
+	char dummy;
+	int high_wm_percent = HIGH_WATERMARK;
+	int low_wm_percent = LOW_WATERMARK;
+	uint64_t x;
+	struct wc_memory_superblock s;
+
+	static struct dm_arg _args[] = {
+		{0, 10, "Invalid number of feature args"},
+	};
+
+	as.argc = argc;
+	as.argv = argv;
+
+	wc = kzalloc(sizeof(struct dm_writecache), GFP_KERNEL);
+	if (!wc) {
+		ti->error = "Cannot allocate writecache structure";
+		r = -ENOMEM;
+		goto bad;
+	}
+	ti->private = wc;
+	wc->ti = ti;
+
+	mutex_init(&wc->lock);
+	writecache_poison_lists(wc);
+	init_swait_queue_head(&wc->freelist_wait);
+	setup_timer(&wc->autocommit_timer, writecache_autocommit_timer, (unsigned long)wc);
+
+	for (i = 0; i < 2; i++) {
+		atomic_set(&wc->bio_in_progress[i], 0);
+		init_swait_queue_head(&wc->bio_in_progress_wait[i]);
+	}
+
+	wc->dm_io = dm_io_client_create();
+	if (!wc->dm_io) {
+		r = -ENOMEM;
+		ti->error = "Unable to allocate dm-io client";
+		goto bad;
+	}
+
+	wc->writeback_wq = alloc_workqueue("writecache-writeabck", WQ_MEM_RECLAIM, 1);
+	if (!wc->writeback_wq) {
+		r = -ENOMEM;
+		ti->error = "Could not allocate writeback workqueue";
+		goto bad;
+	}
+	INIT_WORK(&wc->writeback_work, writecache_writeback);
+	INIT_WORK(&wc->flush_work, writecache_flush_work);
+
+	init_swait_queue_head(&wc->endio_thread_wait);
+	INIT_LIST_HEAD(&wc->endio_list);
+	wc->endio_thread = kthread_create(writecache_endio_thread, wc, "writecache_endio");
+	if (IS_ERR(wc->endio_thread)) {
+		r = PTR_ERR(wc->endio_thread);
+		wc->endio_thread = NULL;
+		ti->error = "Couldn't spawn endio thread";
+		goto bad;
+	}
+	wake_up_process(wc->endio_thread);
+
+	/*
+	 * Parse the mode (pmem or ssd)
+	 */
+	string = dm_shift_arg(&as);
+	if (!string)
+		goto bad_arguments;
+
+	if (!strcasecmp(string, "s")) {
+#ifndef DM_WRITECACHE_ONLY_SSD
+		wc->pmem_mode = false;
+#endif
+	} else if (!strcasecmp(string, "p")) {
+#ifndef DM_WRITECACHE_ONLY_SSD
+		wc->pmem_mode = true;
+#else
+		r = -EOPNOTSUPP;
+		ti->error = "Persistent memory not supported on this architecture";
+		goto bad;
+#endif
+	} else {
+		goto bad_arguments;
+	}
+
+	if (WC_MODE_PMEM(wc)) {
+		wc->bio_set = bioset_create(BIO_POOL_SIZE,
+					    offsetof(struct writeback_struct, bio),
+					    BIOSET_NEED_BVECS);
+		if (!wc->bio_set) {
+			r = -ENOMEM;
+			ti->error = "Could not allocate bio set";
+			goto bad;
+		}
+	} else {
+		wc->copy_pool = mempool_create_kmalloc_pool(1, sizeof(struct copy_struct));
+		if (!wc->copy_pool) {
+			r = -ENOMEM;
+			ti->error = "Could not allocate mempool";
+			goto bad;
+		}
+	}
+
+	/*
+	 * Parse the origin data device
+	 */
+	string = dm_shift_arg(&as);
+	if (!string)
+		goto bad_arguments;
+	r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->dev);
+	if (r) {
+		ti->error = "Origin data device lookup failed";
+		goto bad;
+	}
+
+	/*
+	 * Parse cache data device (be it pmem or ssd)
+	 */
+	string = dm_shift_arg(&as);
+	if (!string)
+		goto bad_arguments;
+	if (WC_MODE_PMEM(wc)) {
+		wc->memory_name = kstrdup(string, GFP_KERNEL);
+		if (!wc->memory_name) {
+			r = -ENOMEM;
+			ti->error = "Could not allocate string";
+			goto bad;
+		}
+
+		r = persistent_memory_claim(ti, wc->memory_name, &wc->pmem_holder,
+					    &wc->memory_map, &wc->memory_map_size);
+		if (r) {
+			ti->error = "Unable to map persistent memory for cache";
+			goto bad;
+		}
+
+		if (!wc->pmem_holder.page_backed) {
+			wc->page_pool = mempool_create_page_pool(BIO_POOL_SIZE, 0);
+			if (!wc->page_pool) {
+				r = -ENOMEM;
+				ti->error = "Could not allocate page mempool";
+				goto bad;
+			}
+		}
+	} else {
+		r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->ssd_dev);
+		if (r) {
+			ti->error = "Cache data device lookup failed";
+			goto bad;
+		}
+		wc->memory_map_size = i_size_read(wc->ssd_dev->bdev->bd_inode);
+	}
+
+	/*
+	 * Parse the cache block size
+	 */
+	string = dm_shift_arg(&as);
+	if (!string)
+		goto bad_arguments;
+	if (sscanf(string, "%u%c", &wc->block_size, &dummy) != 1 ||
+	    wc->block_size < 512 || wc->block_size > PAGE_SIZE ||
+	    (wc->block_size & (wc->block_size - 1))) {
+		r = -EINVAL;
+		ti->error = "Invalid block size";
+		goto bad;
+	}
+	wc->block_size_bits = __ffs(wc->block_size);
+
+	wc->max_writeback_jobs = MAX_WRITEBACK_JOBS;
+	wc->autocommit_blocks = !WC_MODE_PMEM(wc) ? AUTOCOMMIT_BLOCKS_SSD : AUTOCOMMIT_BLOCKS_PMEM;
+	wc->autocommit_jiffies = msecs_to_jiffies(AUTOCOMMIT_MSEC);
+
+	/*
+	 * Parse optional arguments
+	 */
+	r = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
+	if (r)
+		goto bad;
+
+	while (opt_params) {
+		string = dm_shift_arg(&as), opt_params--;
+		if (!strcasecmp(string, "high_watermark") && opt_params >= 1) {
+			string = dm_shift_arg(&as), opt_params--;
+			if (sscanf(string, "%d%%%c", &high_wm_percent, &dummy) != 1)
+				goto invalid_optional;
+			if (high_wm_percent < 0 || high_wm_percent > 100)
+				goto invalid_optional;
+			wc->high_wm_percent_set = true;
+		} else if (!strcasecmp(string, "low_watermark") && opt_params >= 1) {
+			string = dm_shift_arg(&as), opt_params--;
+			if (sscanf(string, "%d%%%c", &low_wm_percent, &dummy) != 1)
+				goto invalid_optional;
+			if (low_wm_percent < 0 || low_wm_percent > 100)
+				goto invalid_optional;
+			wc->low_wm_percent_set = true;
+		} else if (!strcasecmp(string, "writeback_jobs") && opt_params >= 1) {
+			string = dm_shift_arg(&as), opt_params--;
+			if (sscanf(string, "%u%c", &wc->max_writeback_jobs, &dummy) != 1)
+				goto invalid_optional;
+			if (!wc->max_writeback_jobs)
+				goto invalid_optional;
+			wc->max_writeback_jobs_set = true;
+		} else if (!strcasecmp(string, "autocommit_blocks") && opt_params >= 1) {
+			string = dm_shift_arg(&as), opt_params--;
+			if (sscanf(string, "%u%c", &wc->autocommit_blocks, &dummy) != 1)
+				goto invalid_optional;
+			wc->autocommit_blocks_set = true;
+		} else if (!strcasecmp(string, "autocommit_time") && opt_params >= 1) {
+			unsigned autocommit_msecs;
+			string = dm_shift_arg(&as), opt_params--;
+			if (sscanf(string, "%u%c", &autocommit_msecs, &dummy) != 1)
+				goto invalid_optional;
+			if (autocommit_msecs > 3600000)
+				goto invalid_optional;
+			wc->autocommit_jiffies = jiffies_to_msecs(autocommit_msecs);
+			wc->autocommit_time_set = true;
+		} else {
+invalid_optional:
+			r = -EINVAL;
+			ti->error = "Invalid optional argument";
+			goto bad;
+		}
+	}
+
+	if (!WC_MODE_PMEM(wc)) {
+		struct dm_io_region region;
+		struct dm_io_request req;
+		size_t n_blocks, n_metadata_blocks;
+		uint64_t n_bitmap_bits;
+
+		init_completion(&wc->flush_completion);
+		wc->flush_thread = kthread_create(writecache_flush_thread, wc, "writecache_flush");
+		if (IS_ERR(wc->flush_thread)) {
+			r = PTR_ERR(wc->flush_thread);
+			wc->flush_thread = NULL;
+			ti->error = "Couldn't spawn endio thread";
+			goto bad;
+		}
+		writecache_offload_bio(wc, NULL);
+
+		r = calculate_memory_size(wc->memory_map_size, wc->block_size,
+					  &n_blocks, &n_metadata_blocks);
+		if (r) {
+			ti->error = "Invalid device size";
+			goto bad;
+		}
+
+		n_bitmap_bits = (((uint64_t)n_metadata_blocks << wc->block_size_bits) + BITMAP_GRANULARITY - 1) / BITMAP_GRANULARITY;
+		/* this is limitation of test_bit functions */
+		if (n_bitmap_bits > 1U << 31) {
+			r = -EFBIG;
+			ti->error = "Invalid device size";
+		}
+
+		wc->memory_map = vmalloc(n_metadata_blocks << wc->block_size_bits);
+		if (!wc->memory_map) {
+			r = -ENOMEM;
+			ti->error = "Unable to allocate memory for metadata";
+			goto bad;
+		}
+
+		wc->dm_kcopyd = dm_kcopyd_client_create(&dm_kcopyd_throttle);
+		if (!wc->dm_kcopyd) {
+			r = -ENOMEM;
+			ti->error = "Unable to allocate dm-kcopyd client";
+			goto bad;
+		}
+
+		wc->metadata_sectors = n_metadata_blocks << (wc->block_size_bits - SECTOR_SHIFT);
+		wc->dirty_bitmap_size = (n_bitmap_bits + BITS_PER_LONG - 1) / BITS_PER_LONG * sizeof(unsigned long);
+		wc->dirty_bitmap = vzalloc(wc->dirty_bitmap_size);
+		if (!wc->dirty_bitmap) {
+			r = -ENOMEM;
+			ti->error = "Unable to allocate dirty bitmap";
+			goto bad;
+		}
+
+		region.bdev = wc->ssd_dev->bdev;
+		region.sector = 0;
+		region.count = wc->metadata_sectors;
+		req.bi_op = REQ_OP_READ;
+		req.bi_op_flags = REQ_SYNC;
+		req.mem.type = DM_IO_VMA;
+		req.mem.ptr.vma = (char *)wc->memory_map;
+		req.client = wc->dm_io;
+		req.notify.fn = NULL;
+
+		r = dm_io(&req, 1, &region, NULL);
+		if (r) {
+			ti->error = "Unable to read metadata";
+			goto bad;
+		}
+	}
+
+	r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
+	if (r) {
+		ti->error = "Hardware memory error when reading superblock";
+		goto bad;
+	}
+	if (!le32_to_cpu(le32_to_cpu(s.magic)) && !le32_to_cpu(s.version)) {
+		r = init_memory(wc);
+		if (r) {
+			ti->error = "Unable to initialize device";
+			goto bad;
+		}
+		r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
+		if (r) {
+			ti->error = "Hardware memory error when reading superblock";
+			goto bad;
+		}
+	}
+
+	if (le32_to_cpu(s.magic) != MEMORY_SUPERBLOCK_MAGIC) {
+		ti->error = "Invalid magic in the superblock";
+		r = -EINVAL;
+		goto bad;
+	}
+
+	if (le32_to_cpu(s.version) != MEMORY_SUPERBLOCK_VERSION) {
+		ti->error = "Invalid version in the superblock";
+		r = -EINVAL;
+		goto bad;
+	}
+
+	if (le32_to_cpu(s.block_size) != wc->block_size) {
+		ti->error = "Block size does not match superblock";
+		r = -EINVAL;
+		goto bad;
+	}
+
+	wc->n_blocks = le64_to_cpu(s.n_blocks);
+
+	offset = wc->n_blocks * sizeof(struct wc_memory_entry);
+	if (offset / sizeof(struct wc_memory_entry) != le64_to_cpu(sb(wc)->n_blocks)) {
+overflow:
+		ti->error = "Overflow in size calculation";
+		r = -EINVAL;
+		goto bad;
+	}
+	offset += sizeof(struct wc_memory_superblock);
+	if (offset < sizeof(struct wc_memory_superblock))
+		goto overflow;
+	offset = (offset + wc->block_size - 1) & ~(size_t)(wc->block_size - 1);
+	data_size = wc->n_blocks * (size_t)wc->block_size;
+	if (!offset || (data_size / wc->block_size != wc->n_blocks) ||
+	    (offset + data_size < offset))
+		goto overflow;
+	if (offset + data_size > wc->memory_map_size) {
+		ti->error = "Memory area is too small";
+		r = -EINVAL;
+		goto bad;
+	}
+
+	wc->metadata_sectors = offset >> SECTOR_SHIFT;
+	wc->block_start = (char *)sb(wc) + offset;
+
+	x = (uint64_t)wc->n_blocks * (100 - high_wm_percent);
+	x += 50;
+	do_div(x, 100);
+	wc->freelist_high_watermark = x;
+	x = (uint64_t)wc->n_blocks * (100 - low_wm_percent);
+	x += 50;
+	do_div(x, 100);
+	wc->freelist_low_watermark = x;
+
+	r = writecache_alloc_entries(wc);
+	if (r) {
+		ti->error = "Cannot allocate memory";
+		goto bad;
+	}
+
+	ti->num_flush_bios = 1;
+	ti->flush_supported = true;
+	ti->num_discard_bios = 1;
+
+	return 0;
+
+bad_arguments:
+	r = -EINVAL;
+	ti->error = "Bad arguments";
+bad:
+	writecache_dtr(ti);
+	return r;
+}
+
+static void writecache_status(struct dm_target *ti, status_type_t type,
+			      unsigned status_flags, char *result, unsigned maxlen)
+{
+	struct dm_writecache *wc = ti->private;
+	unsigned extra_args;
+	unsigned sz = 0;
+	uint64_t x;
+
+	switch (type) {
+	case STATUSTYPE_INFO:
+		DMEMIT("%ld %llu %llu %llu", writecache_has_error(wc), (unsigned long long)wc->n_blocks,
+		       (unsigned long long)wc->freelist_size, (unsigned long long)wc->writeback_size);
+		break;
+	case STATUSTYPE_TABLE:
+		if (WC_MODE_PMEM(wc))
+			DMEMIT("p %s %s %u ", wc->dev->name, wc->memory_name, wc->block_size);
+		else
+			DMEMIT("s %s %s %u ", wc->dev->name, wc->ssd_dev->name, wc->block_size);
+		extra_args = 0;
+		if (wc->high_wm_percent_set)
+			extra_args += 2;
+		if (wc->low_wm_percent_set)
+			extra_args += 2;
+		if (wc->max_writeback_jobs_set)
+			extra_args += 2;
+		if (wc->autocommit_blocks_set)
+			extra_args += 2;
+		if (wc->autocommit_time_set)
+			extra_args += 2;
+		DMEMIT("%u", extra_args);
+		if (wc->high_wm_percent_set) {
+			x = (uint64_t)wc->freelist_high_watermark * 100;
+			x += wc->n_blocks / 2;
+			do_div(x, (size_t)wc->n_blocks);
+			DMEMIT(" high_watermark %u%%", 100 - (unsigned)x);
+		}
+		if (wc->low_wm_percent_set) {
+			x = (uint64_t)wc->freelist_low_watermark * 100;
+			x += wc->n_blocks / 2;
+			do_div(x, (size_t)wc->n_blocks);
+			DMEMIT(" low_watermark %u%%", 100 - (unsigned)x);
+		}
+		if (wc->max_writeback_jobs_set) {
+			DMEMIT(" writeback_jobs %u", wc->max_writeback_jobs);
+		}
+		if (wc->autocommit_blocks_set) {
+			DMEMIT(" autocommit_blocks %u", wc->autocommit_blocks);
+		}
+		if (wc->autocommit_time_set) {
+			DMEMIT(" autocommit_time %u", jiffies_to_msecs(wc->autocommit_jiffies));
+		}
+		break;
+	}
+}
+
+static struct target_type writecache_target = {
+	.name			= "writecache",
+	.version		= {1, 0, 0},
+	.module			= THIS_MODULE,
+	.ctr			= writecache_ctr,
+	.dtr			= writecache_dtr,
+	.status			= writecache_status,
+	.postsuspend		= writecache_suspend,
+	.resume			= writecache_resume,
+	.message		= writecache_message,
+	.map			= writecache_map,
+	.end_io			= writecache_end_io,
+	.iterate_devices	= writecache_iterate_devices,
+	.io_hints		= writecache_io_hints,
+};
+
+static int __init dm_writecache_init(void)
+{
+	int r;
+
+	r = dm_register_target(&writecache_target);
+	if (r < 0) {
+		DMERR("register failed %d", r);
+		return r;
+	}
+
+	return 0;
+}
+
+static void __exit dm_writecache_exit(void)
+{
+	dm_unregister_target(&writecache_target);
+}
+
+module_init(dm_writecache_init);
+module_exit(dm_writecache_exit);
+
+MODULE_DESCRIPTION(DM_NAME " writecache target");
+MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
+MODULE_LICENSE("GPL");

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 0/4] dm-writecache
  2017-11-07 20:59 [PATCH 0/4] dm-writecache Mikulas Patocka
                   ` (3 preceding siblings ...)
  2017-11-07 21:05 ` [PATCH 4/4] dm-writecache Mikulas Patocka
@ 2017-11-07 21:20 ` Mike Snitzer
  4 siblings, 0 replies; 6+ messages in thread
From: Mike Snitzer @ 2017-11-07 21:20 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Alasdair G. Kergon

On Tue, Nov 07 2017 at  3:59pm -0500,
Mikulas Patocka <mpatocka@redhat.com> wrote:

> Hi
> 
> Here I'm sending patches for the dm-writecache target. It is for kernel 
> 4.14-rc8.

Hi,

Patches 1 - 3 look pretty straight-forward, shouldn't be hard to get
them in sooner rather than later.

So in addition to those you already sent this patchset to: you'd do well
to send patches 1 - 3 to the appropriate maintainers (and cc their
corresponding subsystem mailing list, like linux-mm, etc, otherwise
LKML).

Feel free to re-send the series with patches 1 - 3 addressed
accordingly.. no worries about duplicate posts to dm-devel.

Thanks,
Mike

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-11-07 21:20 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-11-07 20:59 [PATCH 0/4] dm-writecache Mikulas Patocka
2017-11-07 21:00 ` [PATCH 1/4] bio: have bio_kmap_irq return the size of mapped data Mikulas Patocka
2017-11-07 21:04 ` [PATCH 2/4] vmalloc: introduce vmap_pfn Mikulas Patocka
2017-11-07 21:04 ` [PATCH 3/4] swait: export the symbols __prepare_to_swait and __finish_swait Mikulas Patocka
2017-11-07 21:05 ` [PATCH 4/4] dm-writecache Mikulas Patocka
2017-11-07 21:20 ` [PATCH 0/4] dm-writecache Mike Snitzer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.