All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFCv2 04/10] dm-dedup: implementation of the read-on-write procedure
@ 2014-08-28 22:05 Vasily Tarasov
  0 siblings, 0 replies; only message in thread
From: Vasily Tarasov @ 2014-08-28 22:05 UTC (permalink / raw)
  To: dm-devel
  Cc: Joe Thornber, Mike Snitzer, Christoph Hellwig, Philip Shilane,
	Sonam Mandal, Erez Zadok

If a request from the upper layer is smaller than the block size, then
we have to perform a read-on-write to properly compute the hash value.

Signed-off-by: Vasily Tarasov <tarasov@vasily.name>
---
 drivers/md/dm-dedup-rw.c |  248 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/md/dm-dedup-rw.h |   19 ++++
 2 files changed, 267 insertions(+), 0 deletions(-)
 create mode 100644 drivers/md/dm-dedup-rw.c
 create mode 100644 drivers/md/dm-dedup-rw.h

diff --git a/drivers/md/dm-dedup-rw.c b/drivers/md/dm-dedup-rw.c
new file mode 100644
index 0000000..383ec39
--- /dev/null
+++ b/drivers/md/dm-dedup-rw.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2012-2014 Vasily Tarasov
+ * Copyright (C) 2012-2014 Geoff Kuenning
+ * Copyright (C) 2012-2014 Sonam Mandal
+ * Copyright (C) 2012-2014 Karthikeyani Palanisami
+ * Copyright (C) 2012-2014 Philip Shilane
+ * Copyright (C) 2012-2014 Sagar Trehan
+ * Copyright (C) 2012-2014 Erez Zadok
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-dedup-target.h"
+#include "dm-dedup-rw.h"
+#include "dm-dedup-kvstore.h"
+
+#define DMD_IO_SIZE	4096
+
+static uint64_t compute_sector(struct bio *bio,
+			       struct dedup_config *dc)
+{
+	uint64_t to_be_lbn;
+
+	to_be_lbn = bio->bi_iter.bi_sector;
+	to_be_lbn /= dc->sectors_per_block;
+	to_be_lbn *= dc->sectors_per_block;
+
+	return to_be_lbn;
+}
+
+static int fetch_whole_block(struct dedup_config *dc,
+			uint64_t pbn, struct page_list *pl)
+{
+	struct dm_io_request iorq;
+	struct dm_io_region where;
+	unsigned long error_bits;
+
+	where.bdev = dc->data_dev->bdev;
+	where.sector = pbn;
+	where.count = dc->sectors_per_block;
+
+	iorq.bi_rw = READ;
+	iorq.mem.type = DM_IO_PAGE_LIST;
+	iorq.mem.ptr.pl = pl;
+	iorq.mem.offset = 0;
+	iorq.notify.fn = NULL;
+	iorq.client = dc->io_client;
+
+	return dm_io(&iorq, 1, &where, &error_bits);
+}
+
+static int merge_data(struct dedup_config *dc, struct page *page,
+				struct bio *bio)
+{
+	void *src_page_vaddr, *dest_page_vaddr;
+	int position, err = 0;
+
+	/* Relative offset in terms of sector size */
+	position = (bio->bi_iter.bi_sector % dc->sectors_per_block);
+
+	if (!page || !bio->bi_io_vec->bv_page) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	src_page_vaddr = page_address(bio->bi_io_vec->bv_page);
+	dest_page_vaddr = page_address(page);
+
+	src_page_vaddr = src_page_vaddr + bio->bi_io_vec->bv_offset;
+	/* Locating the right sector to merge */
+	dest_page_vaddr = dest_page_vaddr + (to_bytes(position));
+
+	/* Merging Data */
+	memmove(dest_page_vaddr, src_page_vaddr, bio->bi_io_vec->bv_len);
+out:
+	return err;
+}
+
+static void copy_pages(struct page *src, struct bio *clone)
+{
+	void *src_page_vaddr, *dest_page_vaddr;
+
+	src_page_vaddr = page_address(src);
+	dest_page_vaddr = page_address(clone->bi_io_vec->bv_page);
+
+	memmove(dest_page_vaddr, src_page_vaddr, DMD_IO_SIZE);
+}
+
+static void my_endio(struct bio *clone, int error)
+{
+	unsigned rw = bio_data_dir(clone);
+	struct bio *orig;
+	struct bio_vec bv;
+
+	if (!error && !bio_flagged(clone, BIO_UPTODATE))
+		error = -EIO;
+
+	/* free the processed pages */
+	if (rw == WRITE || rw == READ) {
+		bv = bio_iovec(clone);
+		if (bv.bv_page) {
+			free_pages((unsigned long)page_address(bv.bv_page), 0);
+			bv.bv_page = NULL;
+		}
+	}
+
+	orig = clone->bi_private;
+	bio_endio(orig, 0);
+
+	bio_put(clone);
+}
+
+/*
+ * XXX: there  is existing zero_fill_bio() in the kernel,
+ * should we use it?
+ */
+static void my_zero_fill_bio(struct bio *bio)
+{
+	void *data;
+	unsigned int length;
+
+	data = bio_data(bio);
+	length = bio_cur_bytes(bio);
+	memset(data, 0, length);
+}
+
+static struct bio *create_bio(struct dedup_config *dc,
+			      struct bio *bio)
+{
+	struct bio *clone;
+	struct page *page;
+
+	clone = bio_kmalloc(GFP_NOIO, 1);
+	if (!clone)
+		goto out;
+
+	clone->bi_bdev = bio->bi_bdev;
+	clone->bi_rw = bio->bi_rw;
+	clone->bi_iter.bi_sector = compute_sector(bio, dc);
+	clone->bi_private = bio;  /* for later completion */
+	clone->bi_end_io = my_endio;
+
+	page = alloc_pages(GFP_NOIO, 0);
+	if (!page)
+		goto bad_putbio;
+
+	if (!bio_add_page(clone, page, DMD_IO_SIZE, 0))
+		goto bad_freepage;
+
+	goto out;
+
+bad_freepage:
+	free_pages((unsigned long) page_address(page), 0);
+bad_putbio:
+	bio_put(clone);
+	clone = NULL;
+out:
+	return clone;
+}
+
+static struct bio *prepare_bio_with_pbn(struct dedup_config *dc,
+					struct bio *bio, uint64_t pbn)
+{
+	int r = 0;
+	struct page_list *pl;
+	struct bio *clone = NULL;
+
+	pl = kmalloc(sizeof(pl), GFP_NOIO);
+	if (!pl)
+		goto out;
+
+	/*
+	 * Since target I/O size is 4KB currently, we need only one page to
+	 * store the data. However, if the target I/O size increases, we need
+	 * to allocate more pages and set this linked list correctly.
+	 */
+	pl->page = alloc_pages(GFP_NOIO, 0);
+	if (!pl->page)
+		goto out_allocfail;
+
+	pl->next = NULL;
+
+	r = fetch_whole_block(dc, pbn, pl);
+	if (r < 0)
+		goto out_fail;
+
+	r = merge_data(dc, pl->page, bio);
+	if (r < 0)
+		goto out_fail;
+
+	clone = create_bio(dc, bio);
+	if (!clone)
+		goto out_fail;
+
+	copy_pages(pl->page, clone);
+
+out_fail:
+	free_pages((unsigned long) page_address(pl->page), 0);
+out_allocfail:
+	kfree(pl);
+out:
+	return clone;
+}
+
+static struct bio *prepare_bio_without_pbn(struct dedup_config *dc,
+					   struct bio *bio)
+{
+	int r = 0;
+	struct bio *clone = NULL;
+
+	clone = create_bio(dc, bio);
+	if (!clone)
+		goto out;
+
+	my_zero_fill_bio(clone);
+
+	r = merge_data(dc, clone->bi_io_vec->bv_page, bio);
+	if (r < 0)
+		BUG();
+out:
+	return clone;
+}
+
+struct bio *prepare_bio_on_write(struct dedup_config *dc, struct bio *bio)
+{
+	int r;
+	uint64_t lbn_sector;
+	uint64_t lbn;
+	uint32_t vsize;
+	struct lbn_pbn_value lbnpbn_value;
+	struct bio *clone;
+
+	lbn_sector = compute_sector(bio, dc);
+	lbn = lbn_sector / dc->sectors_per_block;
+
+	/* check for old or new lbn and fetch the appropriate pbn */
+	r = dc->kvs_lbn_pbn->kvs_lookup(dc->kvs_lbn_pbn, (void *)&lbn,
+			sizeof(lbn), (void *)&lbnpbn_value, &vsize);
+	if (r == 0)
+		clone = prepare_bio_without_pbn(dc, bio);
+	else if (r == 1)
+		clone = prepare_bio_with_pbn(dc, bio, lbnpbn_value.pbn
+						* dc->sectors_per_block);
+	else
+		BUG();
+
+	return clone;
+}
diff --git a/drivers/md/dm-dedup-rw.h b/drivers/md/dm-dedup-rw.h
new file mode 100644
index 0000000..ad12a27
--- /dev/null
+++ b/drivers/md/dm-dedup-rw.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2012-2014 Vasily Tarasov
+ * Copyright (C) 2012-2014 Geoff Kuenning
+ * Copyright (C) 2012-2014 Sonam Mandal
+ * Copyright (C) 2012-2014 Karthikeyani Palanisami
+ * Copyright (C) 2012-2014 Philip Shilane
+ * Copyright (C) 2012-2014 Sagar Trehan
+ * Copyright (C) 2012-2014 Erez Zadok
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_DEDUP_RW_H
+#define DM_DEDUP_RW_H
+
+extern struct bio *prepare_bio_on_write(struct dedup_config *dc,
+							struct bio *bio);
+
+#endif /* DM_DEDUP_RW_H */
-- 
1.7.1

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2014-08-28 22:05 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-08-28 22:05 [PATCH RFCv2 04/10] dm-dedup: implementation of the read-on-write procedure Vasily Tarasov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.