[PATCH 7/8] Use xvmalloc to store compressed chunks

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Nitin Gupta <ngupta@vflare.org>
To: Pekka Enberg <penberg@cs.helsinki.fi>,
	Hugh Dickins <hugh.dickins@tiscali.co.uk>,
	Andrew Morton <akpm@linux-foundation.org>,
	Greg KH <greg@kroah.com>,
	Dan Magenheimer <dan.magenheimer@oracle.com>,
	Rik van Riel <riel@redhat.com>, Avi Kivity <avi@redhat.com>,
	Christoph Hellwig <hch@infradead.org>,
	Minchan Kim <minchan.kim@gmail.com>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: linux-mm <linux-mm@kvack.org>,
	linux-kernel <linux-kernel@vger.kernel.org>
Subject: [PATCH 7/8] Use xvmalloc to store compressed chunks
Date: Fri, 16 Jul 2010 18:07:49 +0530	[thread overview]
Message-ID: <1279283870-18549-8-git-send-email-ngupta@vflare.org> (raw)
In-Reply-To: <1279283870-18549-1-git-send-email-ngupta@vflare.org>

xvmalloc is an O(1) memory allocator designed specifically
for storing variable sized compressed chunks. It is already
being used by zram driver for the same purpose.

A new statistic is also exported:
/sys/kernel/mm/zcache/pool<id>/mem_used_total

This gives pool's total memory usage, including allocator
fragmentation and metadata overhead.

Currently, we use just one xvmalloc pool per zcache pool.
If this proves to be a performance bottleneck, they will
also be created per-cpu.

xvmalloc details, performance numbers and its comparison
with kmalloc (SLUB):

http://code.google.com/p/compcache/wiki/xvMalloc
http://code.google.com/p/compcache/wiki/xvMallocPerformance
http://code.google.com/p/compcache/wiki/AllocatorsComparison

Signed-off-by: Nitin Gupta <ngupta@vflare.org>
---
 drivers/staging/zram/zcache_drv.c |  150 +++++++++++++++++++++++++++++-------
 drivers/staging/zram/zcache_drv.h |    6 ++
 2 files changed, 127 insertions(+), 29 deletions(-)

diff --git a/drivers/staging/zram/zcache_drv.c b/drivers/staging/zram/zcache_drv.c
index 2a02606..71ca48a 100644
--- a/drivers/staging/zram/zcache_drv.c
+++ b/drivers/staging/zram/zcache_drv.c
@@ -47,6 +47,7 @@
 #include <linux/slab.h>
 #include <linux/u64_stats_sync.h>
 
+#include "xvmalloc.h"
 #include "zcache_drv.h"
 
 static DEFINE_PER_CPU(unsigned char *, compress_buffer);
@@ -179,6 +180,7 @@ static void zcache_destroy_pool(struct zcache_pool *zpool)
 	}
 
 	free_percpu(zpool->stats);
+	xv_destroy_pool(zpool->xv_pool);
 	kfree(zpool);
 }
 
@@ -219,6 +221,12 @@ int zcache_create_pool(void)
 		goto out;
 	}
 
+	zpool->xv_pool = xv_create_pool();
+	if (!zpool->xv_pool) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
 	rwlock_init(&zpool->tree_lock);
 	seqlock_init(&zpool->memlimit_lock);
 	zpool->inode_tree = RB_ROOT;
@@ -446,35 +454,81 @@ static void *zcache_index_to_ptr(unsigned long index)
 }
 
 /*
+ * Encode <page, offset> as a single "pointer" value which is stored
+ * in corresponding radix node.
+ */
+static void *zcache_xv_location_to_ptr(struct page *page, u32 offset)
+{
+	unsigned long ptrval;
+
+	ptrval = page_to_pfn(page) << PAGE_SHIFT;
+	ptrval |= (offset & ~PAGE_MASK);
+
+	return (void *)ptrval;
+}
+
+/*
+ * Decode <page, offset> pair from "pointer" value returned from
+ * radix tree lookup.
+ */
+static void zcache_ptr_to_xv_location(void *ptr, struct page **page,
+				u32 *offset)
+{
+	unsigned long ptrval = (unsigned long)ptr;
+
+	*page = pfn_to_page(ptrval >> PAGE_SHIFT);
+	*offset = ptrval & ~PAGE_MASK;
+}
+
+/*
  * Radix node contains "pointer" value which encode <page, offset>
  * pair, locating the compressed object. Header of the object then
  * contains corresponding 'index' value.
  */
-static unsigned long zcache_ptr_to_index(struct page *page)
+static unsigned long zcache_ptr_to_index(void *ptr)
 {
+	u32 offset;
+	struct page *page;
 	unsigned long index;
+	struct zcache_objheader *zheader;
 
-	if (zcache_is_zero_page(page))
-		index = (unsigned long)(page) >> ZCACHE_ZERO_PAGE_INDEX_SHIFT;
-	else
-		index = page->index;
+	if (zcache_is_zero_page(ptr))
+		return (unsigned long)(ptr) >> ZCACHE_ZERO_PAGE_INDEX_SHIFT;
+
+	zcache_ptr_to_xv_location(ptr, &page, &offset);
+
+	zheader = kmap_atomic(page, KM_USER0) + offset;
+	index = zheader->index;
+	kunmap_atomic(zheader, KM_USER0);
 
 	return index;
 }
 
-void zcache_free_page(struct zcache_pool *zpool, struct page *page)
+void zcache_free_page(struct zcache_pool *zpool, void *ptr)
 {
 	int is_zero;
+	unsigned long flags;
 
-	if (unlikely(!page))
+	if (unlikely(!ptr))
 		return;
 
-	is_zero = zcache_is_zero_page(page);
+	is_zero = zcache_is_zero_page(ptr);
 	if (!is_zero) {
-		int clen = page->private;
+		int clen;
+		void *obj;
+		u32 offset;
+		struct page *page;
+
+		zcache_ptr_to_xv_location(ptr, &page, &offset);
+		obj = kmap_atomic(page, KM_USER0) + offset;
+		clen = xv_get_object_size(obj) -
+				sizeof(struct zcache_objheader);
+		kunmap_atomic(obj, KM_USER0);
 
 		zcache_add_stat(zpool, ZPOOL_STAT_COMPR_SIZE, -clen);
-		__free_page(page);
+		local_irq_save(flags);
+		xv_free(zpool->xv_pool, page, offset);
+		local_irq_restore(flags);
 	}
 
 	zcache_dec_pages(zpool, is_zero);
@@ -491,24 +545,23 @@ static int zcache_store_page(struct zcache_inode_rb *znode,
 			pgoff_t index, struct page *page, int is_zero)
 {
 	int ret;
+	void *nodeptr;
 	size_t clen;
 	unsigned long flags;
+
+	u32 zoffset;
 	struct page *zpage;
 	unsigned char *zbuffer, *zworkmem;
 	unsigned char *src_data, *dest_data;
+
+	struct zcache_objheader *zheader;
 	struct zcache_pool *zpool = znode->pool;
 
 	if (is_zero) {
-		zpage = zcache_index_to_ptr(index);
+		nodeptr = zcache_index_to_ptr(index);
 		goto out_store;
 	}
 
-	zpage = alloc_page(GFP_NOWAIT);
-	if (!zpage) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
 	preempt_disable();
 	zbuffer = __get_cpu_var(compress_buffer);
 	zworkmem = __get_cpu_var(compress_workmem);
@@ -528,17 +581,32 @@ static int zcache_store_page(struct zcache_inode_rb *znode,
 		goto out;
 	}
 
-	dest_data = kmap_atomic(zpage, KM_USER0);
+	local_irq_save(flags);
+	ret = xv_malloc(zpool->xv_pool, clen + sizeof(*zheader),
+			&zpage, &zoffset, GFP_NOWAIT);
+	local_irq_restore(flags);
+	if (unlikely(ret)) {
+		ret = -ENOMEM;
+		preempt_enable();
+		goto out;
+	}
+
+	dest_data = kmap_atomic(zpage, KM_USER0) + zoffset;
+
+	/* Store index value in header */
+	zheader = (struct zcache_objheader *)dest_data;
+	zheader->index = index;
+	dest_data += sizeof(*zheader);
+
 	memcpy(dest_data, zbuffer, clen);
 	kunmap_atomic(dest_data, KM_USER0);
 	preempt_enable();
 
-	zpage->index = index;
-	zpage->private = clen;
+	nodeptr = zcache_xv_location_to_ptr(zpage, zoffset);
 
 out_store:
 	spin_lock_irqsave(&znode->tree_lock, flags);
-	ret = radix_tree_insert(&znode->page_tree, index, zpage);
+	ret = radix_tree_insert(&znode->page_tree, index, nodeptr);
 	if (unlikely(ret)) {
 		spin_unlock_irqrestore(&znode->tree_lock, flags);
 		if (!is_zero)
@@ -752,6 +820,19 @@ static ssize_t compr_data_size_show(struct kobject *kobj,
 }
 ZCACHE_POOL_ATTR_RO(compr_data_size);
 
+/*
+ * Total memory used by this pool, including allocator fragmentation
+ * and metadata overhead.
+ */
+static ssize_t mem_used_total_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	struct zcache_pool *zpool = zcache_kobj_to_pool(kobj);
+
+	return sprintf(buf, "%llu\n", xv_get_total_size_bytes(zpool->xv_pool));
+}
+ZCACHE_POOL_ATTR_RO(mem_used_total);
+
 static void memlimit_sysfs_common(struct kobject *kobj, u64 *value, int store)
 {
 	struct zcache_pool *zpool = zcache_kobj_to_pool(kobj);
@@ -795,6 +876,7 @@ static struct attribute *zcache_pool_attrs[] = {
 	&zero_pages_attr.attr,
 	&orig_data_size_attr.attr,
 	&compr_data_size_attr.attr,
+	&mem_used_total_attr.attr,
 	&memlimit_attr.attr,
 	NULL,
 };
@@ -904,13 +986,17 @@ static int zcache_init_shared_fs(char *uuid, size_t pagesize)
 static int zcache_get_page(int pool_id, ino_t inode_no,
 			pgoff_t index, struct page *page)
 {
-	int ret = -1;
+	int ret;
+	void *nodeptr;
 	size_t clen;
 	unsigned long flags;
+
+	u32 offset;
 	struct page *src_page;
 	unsigned char *src_data, *dest_data;
 
 	struct zcache_inode_rb *znode;
+	struct zcache_objheader *zheader;
 	struct zcache_pool *zpool = zcache->pools[pool_id];
 
 	znode = zcache_find_inode(zpool, inode_no);
@@ -922,29 +1008,35 @@ static int zcache_get_page(int pool_id, ino_t inode_no,
 	BUG_ON(znode->inode_no != inode_no);
 
 	spin_lock_irqsave(&znode->tree_lock, flags);
-	src_page = radix_tree_delete(&znode->page_tree, index);
+	nodeptr = radix_tree_delete(&znode->page_tree, index);
 	if (zcache_inode_is_empty(znode))
 		zcache_inode_isolate(znode);
 	spin_unlock_irqrestore(&znode->tree_lock, flags);
 
 	kref_put(&znode->refcount, zcache_inode_release);
 
-	if (!src_page) {
+	if (!nodeptr) {
 		ret = -EFAULT;
 		goto out;
 	}
 
-	if (zcache_is_zero_page(src_page)) {
+	if (zcache_is_zero_page(nodeptr)) {
 		zcache_handle_zero_page(page);
 		goto out_free;
 	}
 
 	clen = PAGE_SIZE;
-	src_data = kmap_atomic(src_page, KM_USER0);
+	zcache_ptr_to_xv_location(nodeptr, &src_page, &offset);
+
+	src_data = kmap_atomic(src_page, KM_USER0) + offset;
+	zheader = (struct zcache_objheader *)src_data;
+	BUG_ON(zheader->index != index);
+
 	dest_data = kmap_atomic(page, KM_USER1);
 
-	ret = lzo1x_decompress_safe(src_data, src_page->private,
-				dest_data, &clen);
+	ret = lzo1x_decompress_safe(src_data + sizeof(*zheader),
+			xv_get_object_size(src_data) - sizeof(*zheader),
+			dest_data, &clen);
 
 	kunmap_atomic(src_data, KM_USER0);
 	kunmap_atomic(dest_data, KM_USER1);
@@ -956,7 +1048,7 @@ static int zcache_get_page(int pool_id, ino_t inode_no,
 	flush_dcache_page(page);
 
 out_free:
-	zcache_free_page(zpool, src_page);
+	zcache_free_page(zpool, nodeptr);
 	ret = 0; /* success */
 
 out:
diff --git a/drivers/staging/zram/zcache_drv.h b/drivers/staging/zram/zcache_drv.h
index 9ce97da..7283116 100644
--- a/drivers/staging/zram/zcache_drv.h
+++ b/drivers/staging/zram/zcache_drv.h
@@ -41,6 +41,11 @@ static const unsigned zcache_pool_default_memlimit_perc_ram = 10;
  /* We only keep pages that compress to less than this size */
 static const int zcache_max_page_size = PAGE_SIZE / 2;
 
+/* Stored in the beginning of each compressed object */
+struct zcache_objheader {
+	unsigned long index;
+};
+
 /* Red-Black tree node. Maps inode to its page-tree */
 struct zcache_inode_rb {
 	struct radix_tree_root page_tree; /* maps inode index to page */
@@ -64,6 +69,7 @@ struct zcache_pool {
 	seqlock_t memlimit_lock;	/* protects memlimit */
 	u64 memlimit;			/* bytes */
 
+	struct xv_pool *xv_pool;	/* xvmalloc pool */
 	struct zcache_pool_stats_cpu *stats;	/* percpu stats */
 #ifdef CONFIG_SYSFS
 	unsigned char name[MAX_ZPOOL_NAME_LEN];
-- 
1.7.1.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2010-07-16 12:38 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-07-16 12:37 [PATCH 0/8] zcache: page cache compression support Nitin Gupta
2010-07-16 12:37 ` [PATCH 1/8] Allow sharing xvmalloc for zram and zcache Nitin Gupta
2010-07-17 18:10   ` Rik van Riel
2010-07-16 12:37 ` [PATCH 2/8] Basic zcache functionality Nitin Gupta
2010-07-18  8:14   ` Pekka Enberg
2010-07-18  9:45     ` Nitin Gupta
2010-07-18  8:27   ` Pekka Enberg
2010-07-18  8:44   ` Eric Dumazet
2010-07-18  9:51     ` Nitin Gupta
2010-07-16 12:37 ` [PATCH 3/8] Create sysfs nodes and export basic statistics Nitin Gupta
2010-07-16 12:37 ` [PATCH 4/8] Shrink zcache based on memlimit Nitin Gupta
2010-07-20 23:03   ` Minchan Kim
2010-07-21  4:52     ` Nitin Gupta
2010-07-21 11:32       ` Ed Tomlinson
2010-07-23 19:23         ` Nitin Gupta
2010-07-16 12:37 ` [PATCH 5/8] Eliminate zero-filled pages Nitin Gupta
2010-07-16 12:37 ` [PATCH 6/8] Compress pages using LZO Nitin Gupta
2010-07-16 12:37 ` Nitin Gupta [this message]
2010-07-18  7:53   ` [PATCH 7/8] Use xvmalloc to store compressed chunks Pekka Enberg
2010-07-18  8:21     ` Nitin Gupta
2010-07-19  4:36       ` Minchan Kim
2010-07-19  6:48         ` Nitin Gupta
2010-07-16 12:37 ` [PATCH 8/8] Document sysfs entries Nitin Gupta
2010-07-17 21:13 ` [PATCH 0/8] zcache: page cache compression support Ed Tomlinson
2010-07-18  2:23   ` Nitin Gupta
2010-07-18  7:50 ` Pekka Enberg
2010-07-18  8:12   ` Nitin Gupta
2010-07-19 19:57 ` Dan Magenheimer
2010-07-20 13:50   ` Nitin Gupta
2010-07-20 14:28     ` Dan Magenheimer
2010-07-21  4:27       ` Nitin Gupta
2010-07-21 17:37         ` Dan Magenheimer
2010-07-22 19:14 ` Greg KH
2010-07-22 19:54   ` Dan Magenheimer
2010-07-22 21:00     ` Greg KH
2011-01-10 13:16 ` Kirill A. Shutemov
2011-01-18 17:53   ` Dan Magenheimer
2011-01-20 12:33     ` Nitin Gupta
2011-01-20 12:47       ` Christoph Hellwig
2011-01-20 13:16         ` Pekka Enberg
2011-01-20 13:58           ` Nitin Gupta

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:2a02606 dfblob:71ca48a dfblob:9ce97da dfblob:7283116 )
 OR (
bs:"[PATCH 7/8] Use xvmalloc to store compressed chunks" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1279283870-18549-8-git-send-email-ngupta@vflare.org \
    --to=ngupta@vflare.org \
    --cc=akpm@linux-foundation.org \
    --cc=avi@redhat.com \
    --cc=dan.magenheimer@oracle.com \
    --cc=greg@kroah.com \
    --cc=hch@infradead.org \
    --cc=hugh.dickins@tiscali.co.uk \
    --cc=konrad.wilk@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=minchan.kim@gmail.com \
    --cc=penberg@cs.helsinki.fi \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).