All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <20111222155151.GA21469@ca-server1.us.oracle.com>

diff --git a/a/1.txt b/N1/1.txt
index 8b13789..2b52708 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -1 +1,2666 @@
+>From d23d411ef33f094c14855d48962d44aec854c500 Mon Sep 17 00:00:00 2001
+From: Dan Magenheimer <dan.magenheimer@oracle.com>
+Date: Wed, 21 Dec 2011 14:01:59 -0700
+Subject: [PATCH v2 4/6] drivers/staging/ramster: ramster-specific changes to zcache/tmem
 
+In tmem.[ch], new "repatriate" (provoke async get) and "localify" (handle
+incoming data resulting from an async get) routines combine with a handful
+of changes to existing pamops interfaces allow the generic tmem code
+to support asynchronous operations.  Also, a new tmem_xhandle struct
+groups together key information that must be passed to remote tmem stores.
+
+Zcache-main.c is augmented with a large amount of ramster-specific code
+to handle remote operations and "foreign" pages on both ends of the
+"remotify" protocol.  New "foreign" pools are auto-created on demand.
+A "selfshrinker" thread periodically repatriates remote persistent pages
+when local memory conditions allow.  For certain operations, a queue is
+necessary to guarantee strict ordering as out-of-order puts/flushes can
+cause strange race conditions.  Pampd pointers now either point to local
+memory OR describe a remote page; to allow the same 64-bits to describe
+either, the LSB is used to differentiate.  Some acrobatics must be performed
+to ensure local memory is available to handle a remote persistent get,
+or deal with the data directly anyway if the malloc failed.  Lots
+of ramster-specific statistics are available via sysfs.
+
+Note: Some debug ifdefs left in for now.
+
+Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
+
+---
+
+ drivers/staging/ramster/Kconfig       |   17 +-
+ drivers/staging/ramster/Makefile      |    5 +-
+ drivers/staging/ramster/tmem.c        |  117 ++-
+ drivers/staging/ramster/tmem.h        |   46 +-
+ drivers/staging/ramster/zcache-main.c | 1651 +++++++++++++++++++++++++++++----
+ 5 files changed, 1636 insertions(+), 200 deletions(-)
+
+diff --git a/drivers/staging/ramster/Kconfig b/drivers/staging/ramster/Kconfig
+index 7fabcb2..5154693 100644
+--- a/drivers/staging/ramster/Kconfig
++++ b/drivers/staging/ramster/Kconfig
+@@ -1,13 +1,14 @@
+-config ZCACHE
+-	tristate "Dynamic compression of swap pages and clean pagecache pages"
+-	depends on CLEANCACHE || FRONTSWAP
++config RAMSTER
++	tristate "Cross-machine RAM capacity sharing, aka peer-to-peer tmem"
++	depends on (CLEANCACHE || FRONTSWAP) && CONFIGFS_FS && !OCFS2_FS && !ZCACHE && !PREEMPT_VOLUNTARY && !HIGHMEM
+ 	select XVMALLOC
+ 	select LZO_COMPRESS
+ 	select LZO_DECOMPRESS
+ 	default n
+ 	help
+-	  Zcache doubles RAM efficiency while providing a significant
+-	  performance boosts on many workloads.  Zcache uses lzo1x
+-	  compression and an in-kernel implementation of transcendent
+-	  memory to store clean page cache pages and swap in RAM,
+-	  providing a noticeable reduction in disk I/O.
++	  RAMster allows RAM on other machines in a cluster to be utilized
++	  dynamically and symmetrically instead of swapping to a local swap
++	  disk, thus improving performance on memory-constrained workloads
++	  while minimizing total RAM across the cluster.  RAMster, like
++	  zcache, compresses swap pages into local RAM, but then remotifies
++	  the compressed pages to another node in the RAMster cluster.
+diff --git a/drivers/staging/ramster/Makefile b/drivers/staging/ramster/Makefile
+index 60daa27..e6c4a2e 100644
+--- a/drivers/staging/ramster/Makefile
++++ b/drivers/staging/ramster/Makefile
+@@ -1,3 +1,2 @@
+-zcache-y	:=	zcache-main.o tmem.o
+-
+-obj-$(CONFIG_ZCACHE)	+=	zcache.o
++obj-$(CONFIG_RAMSTER)	+=	zcache-main.o tmem.o
++obj-$(CONFIG_RAMSTER)	+=	ramster_o2net.o cluster/
+diff --git a/drivers/staging/ramster/tmem.c b/drivers/staging/ramster/tmem.c
+index 1ca66ea..ed7d07b 100644
+--- a/drivers/staging/ramster/tmem.c
++++ b/drivers/staging/ramster/tmem.c
+@@ -27,6 +27,7 @@
+ #include <linux/list.h>
+ #include <linux/spinlock.h>
+ #include <linux/atomic.h>
++#include <linux/delay.h>
+ 
+ #include "tmem.h"
+ 
+@@ -316,7 +317,7 @@ static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
+ }
+ 
+ static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index,
+-					void *new_pampd)
++					void *new_pampd, bool no_free)
+ {
+ 	struct tmem_objnode **slot;
+ 	void *ret = NULL;
+@@ -325,7 +326,9 @@ static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index,
+ 	if ((slot != NULL) && (*slot != NULL)) {
+ 		void *old_pampd = *(void **)slot;
+ 		*(void **)slot = new_pampd;
+-		(*tmem_pamops.free)(old_pampd, obj->pool, NULL, 0);
++		if (!no_free)
++			(*tmem_pamops.free)(old_pampd, obj->pool,
++						NULL, 0, false);
+ 		ret = new_pampd;
+ 	}
+ 	return ret;
+@@ -481,7 +484,7 @@ static void tmem_objnode_node_destroy(struct tmem_obj *obj,
+ 			if (ht == 1) {
+ 				obj->pampd_count--;
+ 				(*tmem_pamops.free)(objnode->slots[i],
+-						obj->pool, NULL, 0);
++						obj->pool, NULL, 0, true);
+ 				objnode->slots[i] = NULL;
+ 				continue;
+ 			}
+@@ -498,7 +501,8 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
+ 		return;
+ 	if (obj->objnode_tree_height == 0) {
+ 		obj->pampd_count--;
+-		(*tmem_pamops.free)(obj->objnode_tree_root, obj->pool, NULL, 0);
++		(*tmem_pamops.free)(obj->objnode_tree_root,
++					obj->pool, NULL, 0, true);
+ 	} else {
+ 		tmem_objnode_node_destroy(obj, obj->objnode_tree_root,
+ 					obj->objnode_tree_height);
+@@ -529,7 +533,7 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
+  * always flushes for simplicity.
+  */
+ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
+-		char *data, size_t size, bool raw, bool ephemeral)
++		char *data, size_t size, bool raw, int ephemeral)
+ {
+ 	struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL;
+ 	void *pampd = NULL, *pampd_del = NULL;
+@@ -545,7 +549,7 @@ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
+ 			/* if found, is a dup put, flush the old one */
+ 			pampd_del = tmem_pampd_delete_from_obj(obj, index);
+ 			BUG_ON(pampd_del != pampd);
+-			(*tmem_pamops.free)(pampd, pool, oidp, index);
++			(*tmem_pamops.free)(pampd, pool, oidp, index, true);
+ 			if (obj->pampd_count == 0) {
+ 				objnew = obj;
+ 				objfound = NULL;
+@@ -576,7 +580,7 @@ delete_and_free:
+ 	(void)tmem_pampd_delete_from_obj(obj, index);
+ free:
+ 	if (pampd)
+-		(*tmem_pamops.free)(pampd, pool, NULL, 0);
++		(*tmem_pamops.free)(pampd, pool, NULL, 0, true);
+ 	if (objnew) {
+ 		tmem_obj_free(objnew, hb);
+ 		(*tmem_hostops.obj_free)(objnew, pool);
+@@ -586,6 +590,65 @@ out:
+ 	return ret;
+ }
+ 
++void *tmem_localify_get_pampd(struct tmem_pool *pool, struct tmem_oid *oidp,
++				uint32_t index, struct tmem_obj **ret_obj,
++				void **saved_hb)
++{
++	struct tmem_hashbucket *hb;
++	struct tmem_obj *obj = NULL;
++	void *pampd = NULL;
++
++	hb = &pool->hashbucket[tmem_oid_hash(oidp)];
++	spin_lock(&hb->lock);
++	obj = tmem_obj_find(hb, oidp);
++	if (likely(obj != NULL))
++		pampd = tmem_pampd_lookup_in_obj(obj, index);
++	*ret_obj = obj;
++	*saved_hb = (void *)hb;
++	/* note, hashbucket remains locked */
++	return pampd;
++}
++
++void tmem_localify_finish(struct tmem_obj *obj, uint32_t index,
++			  void *pampd, void *saved_hb, bool delete)
++{
++	struct tmem_hashbucket *hb = (struct tmem_hashbucket *)saved_hb;
++
++	BUG_ON(!spin_is_locked(&hb->lock));
++	if (pampd != NULL) {
++		BUG_ON(obj == NULL);
++		(void)tmem_pampd_replace_in_obj(obj, index, pampd, 1);
++	} else if (delete) {
++		BUG_ON(obj == NULL);
++		(void)tmem_pampd_delete_from_obj(obj, index);
++	}
++	spin_unlock(&hb->lock);
++}
++
++static int tmem_repatriate(void **ppampd, struct tmem_hashbucket *hb,
++				struct tmem_pool *pool, struct tmem_oid *oidp,
++				uint32_t index, bool free, char *data)
++{
++	void *old_pampd = *ppampd, *new_pampd = NULL;
++	bool intransit = false;
++	int ret = 0;
++
++
++	if (!is_ephemeral(pool))
++		new_pampd = (*tmem_pamops.repatriate_preload)(
++				old_pampd, pool, oidp, index, &intransit);
++	if (intransit)
++		ret = -EAGAIN;
++	else if (new_pampd != NULL)
++		*ppampd = new_pampd;
++	/* must release the hb->lock else repatriate can't sleep */
++	spin_unlock(&hb->lock);
++	if (!intransit)
++		ret = (*tmem_pamops.repatriate)(old_pampd, new_pampd, pool,
++						oidp, index, free, data);
++	return ret;
++}
++
+ /*
+  * "Get" a page, e.g. if one can be found, copy the tmem page with the
+  * matching handle from PAM space to the kernel.  By tmem definition,
+@@ -607,14 +670,38 @@ int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
+ 	int ret = -1;
+ 	struct tmem_hashbucket *hb;
+ 	bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral);
+-	bool lock_held = false;
++	bool lock_held = 0;
++	void **ppampd;
+ 
++again:
+ 	hb = &pool->hashbucket[tmem_oid_hash(oidp)];
+ 	spin_lock(&hb->lock);
+-	lock_held = true;
++	lock_held = 1;
+ 	obj = tmem_obj_find(hb, oidp);
+ 	if (obj == NULL)
+ 		goto out;
++	ppampd = __tmem_pampd_lookup_in_obj(obj, index);
++	if (ppampd == NULL)
++		goto out;
++	if (tmem_pamops.is_remote(*ppampd)) {
++		ret = tmem_repatriate(ppampd, hb, pool, oidp,
++					index, free, data);
++		lock_held = 0; /* note hb->lock has been unlocked */
++		if (ret == -EAGAIN) {
++			/* rare I think, but should cond_resched()??? */
++			usleep_range(10, 1000);
++			goto again;
++		} else if (ret != 0) {
++#if 1
++			if (ret != -ENOENT)
++				pr_err("UNTESTED case in tmem_get, ret=%d\n",
++						ret);
++#endif
++			ret = -1;
++			goto out;
++		}
++		goto out;
++	}
+ 	if (free)
+ 		pampd = tmem_pampd_delete_from_obj(obj, index);
+ 	else
+@@ -628,10 +715,6 @@ int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
+ 			obj = NULL;
+ 		}
+ 	}
+-	if (tmem_pamops.is_remote(pampd)) {
+-		lock_held = false;
+-		spin_unlock(&hb->lock);
+-	}
+ 	if (free)
+ 		ret = (*tmem_pamops.get_data_and_free)(
+ 				data, size, raw, pampd, pool, oidp, index);
+@@ -668,7 +751,7 @@ int tmem_flush_page(struct tmem_pool *pool,
+ 	pampd = tmem_pampd_delete_from_obj(obj, index);
+ 	if (pampd == NULL)
+ 		goto out;
+-	(*tmem_pamops.free)(pampd, pool, oidp, index);
++	(*tmem_pamops.free)(pampd, pool, oidp, index, true);
+ 	if (obj->pampd_count == 0) {
+ 		tmem_obj_free(obj, hb);
+ 		(*tmem_hostops.obj_free)(obj, pool);
+@@ -682,8 +765,8 @@ out:
+ 
+ /*
+  * If a page in tmem matches the handle, replace the page so that any
+- * subsequent "get" gets the new page.  Returns 0 if
+- * there was a page to replace, else returns -1.
++ * subsequent "get" gets the new page.  Returns the new page if
++ * there was a page to replace, else returns NULL.
+  */
+ int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp,
+ 			uint32_t index, void *new_pampd)
+@@ -697,7 +780,7 @@ int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp,
+ 	obj = tmem_obj_find(hb, oidp);
+ 	if (obj == NULL)
+ 		goto out;
+-	new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd);
++	new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd, 0);
+ 	ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj);
+ out:
+ 	spin_unlock(&hb->lock);
+diff --git a/drivers/staging/ramster/tmem.h b/drivers/staging/ramster/tmem.h
+index ed147c4..47f1918 100644
+--- a/drivers/staging/ramster/tmem.h
++++ b/drivers/staging/ramster/tmem.h
+@@ -9,7 +9,6 @@
+ #ifndef _TMEM_H_
+ #define _TMEM_H_
+ 
+-#include <linux/types.h>
+ #include <linux/highmem.h>
+ #include <linux/hash.h>
+ #include <linux/atomic.h>
+@@ -89,6 +88,31 @@ struct tmem_oid {
+ 	uint64_t oid[3];
+ };
+ 
++struct tmem_xhandle {
++	uint8_t client_id;
++	uint8_t xh_data_cksum;
++	uint16_t xh_data_size;
++	uint16_t pool_id;
++	struct tmem_oid oid;
++	uint32_t index;
++	void *extra;
++};
++
++static inline struct tmem_xhandle tmem_xhandle_fill(uint16_t client_id,
++					struct tmem_pool *pool,
++					struct tmem_oid *oidp,
++					uint32_t index)
++{
++	struct tmem_xhandle xh;
++	xh.client_id = client_id;
++	xh.xh_data_cksum = (uint8_t)-1;
++	xh.xh_data_size = (uint16_t)-1;
++	xh.pool_id = pool->pool_id;
++	xh.oid = *oidp;
++	xh.index = index;
++	return xh;
++}
++
+ static inline void tmem_oid_set_invalid(struct tmem_oid *oidp)
+ {
+ 	oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL;
+@@ -147,7 +171,11 @@ struct tmem_obj {
+ 	unsigned int objnode_tree_height;
+ 	unsigned long objnode_count;
+ 	long pampd_count;
+-	void *extra; /* for private use by pampd implementation */
++	/* for current design of ramster, all pages belonging to
++	 * an object reside on the same remotenode and extra is
++	 * used to record the number of the remotenode so a
++	 * flush-object operation can specify it */
++	void *extra; /* for use by pampd implementation */
+ 	DECL_SENTINEL
+ };
+ 
+@@ -174,9 +202,14 @@ struct tmem_pamops {
+ 	int (*get_data_and_free)(char *, size_t *, bool, void *,
+ 				struct tmem_pool *, struct tmem_oid *,
+ 				uint32_t);
+-	void (*free)(void *, struct tmem_pool *, struct tmem_oid *, uint32_t);
++	void (*free)(void *, struct tmem_pool *,
++				struct tmem_oid *, uint32_t, bool);
+ 	void (*free_obj)(struct tmem_pool *, struct tmem_obj *);
+ 	bool (*is_remote)(void *);
++	void *(*repatriate_preload)(void *, struct tmem_pool *,
++					struct tmem_oid *, uint32_t, bool *);
++	int (*repatriate)(void *, void *, struct tmem_pool *,
++				struct tmem_oid *, uint32_t, bool, void *);
+ 	void (*new_obj)(struct tmem_obj *);
+ 	int (*replace_in_obj)(void *, struct tmem_obj *);
+ };
+@@ -193,11 +226,16 @@ extern void tmem_register_hostops(struct tmem_hostops *m);
+ 
+ /* core tmem accessor functions */
+ extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index,
+-			char *, size_t, bool, bool);
++			char *, size_t, bool, int);
+ extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index,
+ 			char *, size_t *, bool, int);
+ extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index,
+ 			void *);
++extern void *tmem_localify_get_pampd(struct tmem_pool *, struct tmem_oid *,
++				   uint32_t index, struct tmem_obj **,
++				   void **);
++extern void tmem_localify_finish(struct tmem_obj *, uint32_t index,
++				 void *, void *, bool);
+ extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *,
+ 			uint32_t index);
+ extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *);
+diff --git a/drivers/staging/ramster/zcache-main.c b/drivers/staging/ramster/zcache-main.c
+index cd0ed84..92fc7b2 100644
+--- a/drivers/staging/ramster/zcache-main.c
++++ b/drivers/staging/ramster/zcache-main.c
+@@ -1,7 +1,7 @@
+ /*
+  * zcache.c
+  *
+- * Copyright (c) 2010,2011, Dan Magenheimer, Oracle Corp.
++ * Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp.
+  * Copyright (c) 2010,2011, Nitin Gupta
+  *
+  * Zcache provides an in-kernel "host implementation" for transcendent memory
+@@ -17,9 +17,11 @@
+  *
+  * [1] For a definition of page-accessible memory (aka PAM), see:
+  *   http://marc.info/?l=linux-mm&m=127811271605009
++ *  RAMSTER TODO:
++ *   - handle remotifying of buddied pages (see zbud_remotify_zbpg)
++ *   - kernel boot params: nocleancache/nofrontswap don't always work?!?
+  */
+ 
+-#include <linux/module.h>
+ #include <linux/cpu.h>
+ #include <linux/highmem.h>
+ #include <linux/list.h>
+@@ -30,11 +32,13 @@
+ #include <linux/atomic.h>
+ #include <linux/math64.h>
+ #include "tmem.h"
++#include "zcache.h"
++#include "ramster.h"
+ 
+ #include "../zram/xvmalloc.h" /* if built in drivers/staging */
+ 
+ #if (!defined(CONFIG_CLEANCACHE) && !defined(CONFIG_FRONTSWAP))
+-#error "zcache is useless without CONFIG_CLEANCACHE or CONFIG_FRONTSWAP"
++#error "ramster is useless without CONFIG_CLEANCACHE or CONFIG_FRONTSWAP"
+ #endif
+ #ifdef CONFIG_CLEANCACHE
+ #include <linux/cleancache.h>
+@@ -43,6 +47,61 @@
+ #include <linux/frontswap.h>
+ #endif
+ 
++enum ramster_remotify_op {
++	RAMSTER_REMOTIFY_EPH_PUT,
++	RAMSTER_REMOTIFY_PERS_PUT,
++	RAMSTER_REMOTIFY_FLUSH_PAGE,
++	RAMSTER_REMOTIFY_FLUSH_OBJ,
++	RAMSTER_INTRANSIT_PERS
++};
++
++struct ramster_remotify_hdr {
++	enum ramster_remotify_op op;
++	struct list_head list;
++};
++
++#define ZBH_SENTINEL  0x43214321
++#define ZBPG_SENTINEL  0xdeadbeef
++
++#define ZBUD_MAX_BUDS 2
++
++struct zbud_hdr {
++	struct ramster_remotify_hdr rem_op;
++	uint16_t client_id;
++	uint16_t pool_id;
++	struct tmem_oid oid;
++	uint32_t index;
++	uint16_t size; /* compressed size in bytes, zero means unused */
++	DECL_SENTINEL
++};
++
++#define ZVH_SENTINEL  0x43214321
++static const int zv_max_page_size = (PAGE_SIZE / 8) * 7;
++
++struct zv_hdr {
++	struct ramster_remotify_hdr rem_op;
++	uint16_t client_id;
++	uint16_t pool_id;
++	struct tmem_oid oid;
++	uint32_t index;
++	DECL_SENTINEL
++};
++
++struct flushlist_node {
++	struct ramster_remotify_hdr rem_op;
++	struct tmem_xhandle xh;
++};
++
++union {
++	struct ramster_remotify_hdr rem_op;
++	struct zv_hdr zv;
++	struct zbud_hdr zbud;
++	struct flushlist_node flist;
++} remotify_list_node;
++
++static LIST_HEAD(zcache_rem_op_list);
++static DEFINE_SPINLOCK(zcache_rem_op_list_lock);
++
+ #if 0
+ /* this is more aggressive but may cause other problems? */
+ #define ZCACHE_GFP_MASK	(GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN)
+@@ -98,20 +157,6 @@ static inline bool is_local_client(struct zcache_client *cli)
+  * read or written unless the zbpg's lock is held.
+  */
+ 
+-#define ZBH_SENTINEL  0x43214321
+-#define ZBPG_SENTINEL  0xdeadbeef
+-
+-#define ZBUD_MAX_BUDS 2
+-
+-struct zbud_hdr {
+-	uint16_t client_id;
+-	uint16_t pool_id;
+-	struct tmem_oid oid;
+-	uint32_t index;
+-	uint16_t size; /* compressed size in bytes, zero means unused */
+-	DECL_SENTINEL
+-};
+-
+ struct zbud_page {
+ 	struct list_head bud_list;
+ 	spinlock_t lock;
+@@ -153,8 +198,37 @@ static unsigned long zcache_zbud_curr_zbytes;
+ static unsigned long zcache_zbud_cumul_zpages;
+ static unsigned long zcache_zbud_cumul_zbytes;
+ static unsigned long zcache_compress_poor;
++static unsigned long zcache_policy_percent_exceeded;
+ static unsigned long zcache_mean_compress_poor;
+ 
++/*
++ * RAMster counters
++ * - Remote pages are pages with a local pampd but the data is remote
++ * - Foreign pages are pages stored locally but belonging to another node
++ */
++static atomic_t ramster_remote_pers_pages = ATOMIC_INIT(0);
++static unsigned long ramster_pers_remotify_enable;
++static unsigned long ramster_eph_remotify_enable;
++static unsigned long ramster_eph_pages_remoted;
++static unsigned long ramster_eph_pages_remote_failed;
++static unsigned long ramster_pers_pages_remoted;
++static unsigned long ramster_pers_pages_remote_failed;
++static unsigned long ramster_pers_pages_remote_nomem;
++static unsigned long ramster_remote_objects_flushed;
++static unsigned long ramster_remote_object_flushes_failed;
++static unsigned long ramster_remote_pages_flushed;
++static unsigned long ramster_remote_page_flushes_failed;
++static unsigned long ramster_remote_eph_pages_succ_get;
++static unsigned long ramster_remote_pers_pages_succ_get;
++static unsigned long ramster_remote_eph_pages_unsucc_get;
++static unsigned long ramster_remote_pers_pages_unsucc_get;
++static atomic_t ramster_curr_flnode_count = ATOMIC_INIT(0);
++static unsigned long ramster_curr_flnode_count_max;
++static atomic_t ramster_foreign_eph_pampd_count = ATOMIC_INIT(0);
++static unsigned long ramster_foreign_eph_pampd_count_max;
++static atomic_t ramster_foreign_pers_pampd_count = ATOMIC_INIT(0);
++static unsigned long ramster_foreign_pers_pampd_count_max;
++
+ /* forward references */
+ static void *zcache_get_free_page(void);
+ static void zcache_free_page(void *p);
+@@ -210,6 +284,29 @@ static char *zbud_data(struct zbud_hdr *zh, unsigned size)
+ 	return p;
+ }
+ 
++static void zbud_copy_from_pampd(char *data, size_t *size, struct zbud_hdr *zh)
++{
++	struct zbud_page *zbpg;
++	char *p;
++	unsigned budnum;
++
++	ASSERT_SENTINEL(zh, ZBH);
++	budnum = zbud_budnum(zh);
++	zbpg = container_of(zh, struct zbud_page, buddy[budnum]);
++	spin_lock(&zbpg->lock);
++	BUG_ON(zh->size > *size);
++	p = (char *)zbpg;
++	if (budnum == 0)
++		p += ((sizeof(struct zbud_page) + CHUNK_SIZE - 1) &
++							CHUNK_MASK);
++	else if (budnum == 1)
++		p += PAGE_SIZE - ((zh->size + CHUNK_SIZE - 1) & CHUNK_MASK);
++	/* client should be filled in by caller */
++	memcpy(data, p, zh->size);
++	*size = zh->size;
++	spin_unlock(&zbpg->lock);
++}
++
+ /*
+  * zbud raw page management
+  */
+@@ -299,6 +396,7 @@ static void zbud_free_and_delist(struct zbud_hdr *zh)
+ 	struct zbud_page *zbpg =
+ 		container_of(zh, struct zbud_page, buddy[budnum]);
+ 
++	BUG_ON(!irqs_disabled());
+ 	spin_lock(&zbpg->lock);
+ 	if (list_empty(&zbpg->bud_list)) {
+ 		/* ignore zombie page... see zbud_evict_pages() */
+@@ -358,8 +456,13 @@ static struct zbud_hdr *zbud_create(uint16_t client_id, uint16_t pool_id,
+ 	if (unlikely(zbpg == NULL))
+ 		goto out;
+ 	/* ok, have a page, now compress the data before taking locks */
++#if 1 /* 110721 FIX LOCK ORDERING TO ELIMINATE DEADLOCK */
++	spin_lock(&zbud_budlists_spinlock);
++	spin_lock(&zbpg->lock);
++#else
+ 	spin_lock(&zbpg->lock);
+ 	spin_lock(&zbud_budlists_spinlock);
++#endif
+ 	list_add_tail(&zbpg->bud_list, &zbud_unbuddied[nchunks].list);
+ 	zbud_unbuddied[nchunks].count++;
+ 	zh = &zbpg->buddy[0];
+@@ -389,12 +492,19 @@ init_zh:
+ 	zh->oid = *oid;
+ 	zh->pool_id = pool_id;
+ 	zh->client_id = client_id;
++#if 1 /* 110721 FIX LOCK ORDERING TO ELIMINATE DEADLOCK */
++	to = zbud_data(zh, size);
++	memcpy(to, cdata, size);
++	spin_unlock(&zbpg->lock);
++	spin_unlock(&zbud_budlists_spinlock);
++#else
+ 	/* can wait to copy the data until the list locks are dropped */
+ 	spin_unlock(&zbud_budlists_spinlock);
+ 
+ 	to = zbud_data(zh, size);
+ 	memcpy(to, cdata, size);
+ 	spin_unlock(&zbpg->lock);
++#endif
+ 	zbud_cumul_chunk_counts[nchunks]++;
+ 	atomic_inc(&zcache_zbud_curr_zpages);
+ 	zcache_zbud_cumul_zpages++;
+@@ -458,6 +568,7 @@ static void zbud_evict_zbpg(struct zbud_page *zbpg)
+ 	uint32_t index[ZBUD_MAX_BUDS];
+ 	struct tmem_oid oid[ZBUD_MAX_BUDS];
+ 	struct tmem_pool *pool;
++	unsigned long flags;
+ 
+ 	ASSERT_SPINLOCK(&zbpg->lock);
+ 	BUG_ON(!list_empty(&zbpg->bud_list));
+@@ -474,9 +585,12 @@ static void zbud_evict_zbpg(struct zbud_page *zbpg)
+ 	}
+ 	spin_unlock(&zbpg->lock);
+ 	for (i = 0; i < j; i++) {
++		/* FIXME FIXME this just evicts local ephemeral pages!!! */
+ 		pool = zcache_get_pool_by_id(client_id[i], pool_id[i]);
+ 		if (pool != NULL) {
++			local_irq_save(flags);
+ 			tmem_flush_page(pool, &oid[i], index[i]);
++			local_irq_restore(flags);
+ 			zcache_put_pool(pool);
+ 		}
+ 	}
+@@ -496,7 +610,7 @@ static void zbud_evict_zbpg(struct zbud_page *zbpg)
+ static void zbud_evict_pages(int nr)
+ {
+ 	struct zbud_page *zbpg;
+-	int i;
++	int i, newly_unused_pages = 0;
+ 
+ 	/* first try freeing any pages on unused list */
+ retry_unused_list:
+@@ -512,7 +626,7 @@ retry_unused_list:
+ 		zcache_free_page(zbpg);
+ 		zcache_evicted_raw_pages++;
+ 		if (--nr <= 0)
+-			goto out;
++			goto done;
+ 		goto retry_unused_list;
+ 	}
+ 	spin_unlock_bh(&zbpg_unused_list_spinlock);
+@@ -534,9 +648,10 @@ retry_unbud_list_i:
+ 			zcache_evicted_unbuddied_pages++;
+ 			/* want budlists unlocked when doing zbpg eviction */
+ 			zbud_evict_zbpg(zbpg);
++			newly_unused_pages++;
+ 			local_bh_enable();
+ 			if (--nr <= 0)
+-				goto out;
++				goto evict_unused;
+ 			goto retry_unbud_list_i;
+ 		}
+ 		spin_unlock_bh(&zbud_budlists_spinlock);
+@@ -547,7 +662,7 @@ retry_bud_list:
+ 	spin_lock_bh(&zbud_budlists_spinlock);
+ 	if (list_empty(&zbud_buddied_list)) {
+ 		spin_unlock_bh(&zbud_budlists_spinlock);
+-		goto out;
++		goto evict_unused;
+ 	}
+ 	list_for_each_entry(zbpg, &zbud_buddied_list, bud_list) {
+ 		if (unlikely(!spin_trylock(&zbpg->lock)))
+@@ -558,16 +673,362 @@ retry_bud_list:
+ 		zcache_evicted_buddied_pages++;
+ 		/* want budlists unlocked when doing zbpg eviction */
+ 		zbud_evict_zbpg(zbpg);
++		newly_unused_pages++;
+ 		local_bh_enable();
+ 		if (--nr <= 0)
+-			goto out;
++			goto evict_unused;
+ 		goto retry_bud_list;
+ 	}
+ 	spin_unlock_bh(&zbud_budlists_spinlock);
++
++evict_unused:
++	/*
++	 * zbud_evict_zbpg just moves pages on the unused list, it doesn't
++	 * free them so we need to actually free them here.
++	 */
++	spin_lock_bh(&zbpg_unused_list_spinlock);
++	if (!list_empty(&zbpg_unused_list) && newly_unused_pages--) {
++		/* can't walk list here, since it may change when unlocked */
++		zbpg = list_first_entry(&zbpg_unused_list,
++				struct zbud_page, bud_list);
++		list_del_init(&zbpg->bud_list);
++		zcache_zbpg_unused_list_count--;
++		atomic_dec(&zcache_zbud_curr_raw_pages);
++		spin_unlock_bh(&zbpg_unused_list_spinlock);
++		zcache_free_page(zbpg);
++		goto evict_unused;
++	}
++	spin_unlock_bh(&zbpg_unused_list_spinlock);
++done:
++	return;
++}
++
++static DEFINE_PER_CPU(unsigned char *, zcache_remoteputmem);
++
++/* only doing unbuddied for now, so only one remote put per zbpg */
++static int zbud_remotify_zbpg(struct zbud_page *zbpg)
++{
++	struct zbud_hdr *zh;
++	struct tmem_xhandle xh;
++	struct tmem_pool *pool;
++	bool ephemeral;
++	char *data;
++	size_t size;
++	int remotenode, ret = -1;
++	unsigned long flags;
++	unsigned char cksum;
++	char *p;
++	int i;
++	unsigned char *tmpmem = __get_cpu_var(zcache_remoteputmem);
++
++	ASSERT_SPINLOCK(&zbpg->lock);
++	BUG_ON(!list_empty(&zbpg->bud_list));
++
++	if (zbpg->buddy[0].size == 0)
++		zh = &zbpg->buddy[1];
++	else if (zbpg->buddy[1].size == 0)
++		zh = &zbpg->buddy[0];
++	else
++		BUG(); /* apparently NOT unbuddied ?!? */
++
++	/* don't remotify pages that are already remotified */
++	if (zh->client_id != LOCAL_CLIENT) {
++		spin_unlock(&zbpg->lock);
++		ret = 0;
++		preempt_enable();
++		goto out;
++	}
++	xh.client_id = zh->client_id;
++	xh.pool_id = zh->pool_id;
++	xh.oid = zh->oid;
++	xh.index = zh->index;
++	size = zh->size;
++	data = zbud_data(zh, size);
++	for (p = data, cksum = 0, i = 0; i < size; i++)
++		cksum += *p;
++	memcpy(tmpmem, data, size);
++	data = tmpmem;
++	spin_unlock(&zbpg->lock);
++	preempt_enable();	/* no locks held anymore */
++	pool = zcache_get_pool_by_id(zh->client_id, zh->pool_id);
++	BUG_ON(pool == NULL);
++	ephemeral = !pool->persistent;
++	zcache_put_pool(pool);
++	ret = ramster_remote_put(&xh, data, size, ephemeral, &remotenode);
++	if (ret == 0) {
++		/* data was successfully remoted so change the local version
++		 * to point to the remote node where it landed */
++		struct tmem_pool *pool;
++		pool = zcache_get_pool_by_id(LOCAL_CLIENT, xh.pool_id);
++		BUG_ON(pool == NULL);
++		local_irq_save(flags);
++		(void)tmem_replace(pool, &xh.oid, xh.index,
++			pampd_make_remote(remotenode, size, cksum));
++		local_irq_restore(flags);
++		zcache_put_pool(pool);
++		ramster_eph_pages_remoted++;
++		ret = 1;
++	} else
++		ramster_eph_pages_remote_failed++;
++
++out:
++	return ret;
++}
++
++void zbud_remotify_pages(int nr)
++{
++	struct zbud_page *zbpg;
++	int i, ret;
++
++	/*
++	 * for now just try remotifying unbuddied pages, starting with
++	 * least space avail
++	 */
++	for (i = 0; i < MAX_CHUNK; i++) {
++retry_unbud_list_i:
++		preempt_disable();  /* enable in zbud_remotify_zbpg */
++		spin_lock_bh(&zbud_budlists_spinlock);
++		if (list_empty(&zbud_unbuddied[i].list)) {
++			spin_unlock_bh(&zbud_budlists_spinlock);
++			continue;
++		}
++		list_for_each_entry(zbpg, &zbud_unbuddied[i].list, bud_list) {
++			if (unlikely(!spin_trylock(&zbpg->lock))) {
++				spin_unlock_bh(&zbud_budlists_spinlock);
++				preempt_enable();
++				continue;
++			}
++			list_del_init(&zbpg->bud_list);
++			zbud_unbuddied[i].count--;
++			spin_unlock(&zbud_budlists_spinlock);
++			/* want budlists unlocked when doing zbpg remotify */
++			local_bh_enable();
++			ret = zbud_remotify_zbpg(zbpg);
++			/* preemption is now re-enabled */
++			if (ret == 0)
++				BUG();
++			else if (ret == 1)
++				--nr;
++			else {
++				/* if fail to remotify any page, quit */
++pr_err("TESTING zbud_remotify_pages failed on page, trying to re-add\n");
++				spin_lock_bh(&zbud_budlists_spinlock);
++				spin_lock(&zbpg->lock);
++				list_add_tail(&zbpg->bud_list,
++					&zbud_unbuddied[i].list);
++				zbud_unbuddied[i].count++;
++				spin_unlock(&zbpg->lock);
++				spin_unlock_bh(&zbud_budlists_spinlock);
++pr_err("TESTING zbud_remotify_pages failed on page, finished re-add\n");
++				goto out;
++			}
++			if (nr <= 0)
++				goto out;
++			goto retry_unbud_list_i;
++		}
++		spin_unlock_bh(&zbud_budlists_spinlock);
++	}
+ out:
+ 	return;
+ }
+ 
++/* the "flush list" asynchronously collects pages to remotely flush */
++#define FLUSH_ENTIRE_OBJECT ((uint32_t)-1)
++static void ramster_flnode_free(struct flushlist_node *,
++				struct tmem_pool *);
++
++static void zcache_remote_flush_page(struct flushlist_node *flnode)
++{
++	struct tmem_xhandle *xh;
++	int remotenode, ret;
++
++	preempt_disable();
++	xh = &flnode->xh;
++	remotenode = flnode->xh.client_id;
++	ret = ramster_remote_flush(xh, remotenode);
++	if (ret >= 0)
++		ramster_remote_pages_flushed++;
++	else
++		ramster_remote_page_flushes_failed++;
++	preempt_enable_no_resched();
++	ramster_flnode_free(flnode, NULL);
++}
++
++static void zcache_remote_flush_object(struct flushlist_node *flnode)
++{
++	struct tmem_xhandle *xh;
++	int remotenode, ret;
++
++	preempt_disable();
++	xh = &flnode->xh;
++	remotenode = flnode->xh.client_id;
++	ret = ramster_remote_flush_object(xh, remotenode);
++	if (ret >= 0)
++		ramster_remote_objects_flushed++;
++	else
++		ramster_remote_object_flushes_failed++;
++	preempt_enable_no_resched();
++	ramster_flnode_free(flnode, NULL);
++}
++
++static void zcache_remote_eph_put(struct zbud_hdr *zbud)
++{
++	/* FIXME */
++}
++
++static void zcache_remote_pers_put(struct zv_hdr *zv)
++{
++	struct tmem_xhandle xh;
++	uint16_t size;
++	bool ephemeral;
++	int remotenode, ret = -1;
++	char *data;
++	struct tmem_pool *pool;
++	unsigned long flags;
++	unsigned char cksum;
++	char *p;
++	int i;
++	unsigned char *tmpmem = __get_cpu_var(zcache_remoteputmem);
++
++	ASSERT_SENTINEL(zv, ZVH);
++	BUG_ON(zv->client_id != LOCAL_CLIENT);
++	local_bh_disable();
++	xh.client_id = zv->client_id;
++	xh.pool_id = zv->pool_id;
++	xh.oid = zv->oid;
++	xh.index = zv->index;
++	size = xv_get_object_size(zv) - sizeof(*zv);
++	BUG_ON(size == 0 || size > zv_max_page_size);
++	data = (char *)zv + sizeof(*zv);
++	for (p = data, cksum = 0, i = 0; i < size; i++)
++		cksum += *p;
++	memcpy(tmpmem, data, size);
++	data = tmpmem;
++	pool = zcache_get_pool_by_id(zv->client_id, zv->pool_id);
++	ephemeral = is_ephemeral(pool);
++	zcache_put_pool(pool);
++	/* now OK to release lock set in caller */
++	spin_unlock(&zcache_rem_op_list_lock);
++	local_bh_enable();
++	preempt_disable();
++	ret = ramster_remote_put(&xh, data, size, ephemeral, &remotenode);
++	preempt_enable_no_resched();
++	if (ret != 0) {
++		/*
++		 * This is some form of a memory leak... if the remote put
++		 * fails, there will never be another attempt to remotify
++		 * this page.  But since we've dropped the zv pointer,
++		 * the page may have been freed or the data replaced
++		 * so we can't just "put it back" in the remote op list.
++		 * Even if we could, not sure where to put it in the list
++		 * because there may be flushes that must be strictly
++		 * ordered vs the put.  So leave this as a FIXME for now.
++		 * But count them so we know if it becomes a problem.
++		 */
++		ramster_pers_pages_remote_failed++;
++		goto out;
++	} else
++		atomic_inc(&ramster_remote_pers_pages);
++	ramster_pers_pages_remoted++;
++	/*
++	 * data was successfully remoted so change the local version to
++	 * point to the remote node where it landed
++	 */
++	local_bh_disable();
++	pool = zcache_get_pool_by_id(LOCAL_CLIENT, xh.pool_id);
++	local_irq_save(flags);
++	(void)tmem_replace(pool, &xh.oid, xh.index,
++			pampd_make_remote(remotenode, size, cksum));
++	local_irq_restore(flags);
++	zcache_put_pool(pool);
++	local_bh_enable();
++out:
++	return;
++}
++
++static void zcache_do_remotify_ops(int nr)
++{
++	struct ramster_remotify_hdr *rem_op;
++	union remotify_list_node *u;
++
++	while (1) {
++		if (!nr)
++			goto out;
++		spin_lock(&zcache_rem_op_list_lock);
++		if (list_empty(&zcache_rem_op_list)) {
++			spin_unlock(&zcache_rem_op_list_lock);
++			goto out;
++		}
++		rem_op = list_first_entry(&zcache_rem_op_list,
++				struct ramster_remotify_hdr, list);
++		list_del_init(&rem_op->list);
++		if (rem_op->op != RAMSTER_REMOTIFY_PERS_PUT)
++			spin_unlock(&zcache_rem_op_list_lock);
++		u = (union remotify_list_node *)rem_op;
++		switch (rem_op->op) {
++		case RAMSTER_REMOTIFY_EPH_PUT:
++BUG();
++			zcache_remote_eph_put((struct zbud_hdr *)rem_op);
++			break;
++		case RAMSTER_REMOTIFY_PERS_PUT:
++			zcache_remote_pers_put((struct zv_hdr *)rem_op);
++			break;
++		case RAMSTER_REMOTIFY_FLUSH_PAGE:
++			zcache_remote_flush_page((struct flushlist_node *)u);
++			break;
++		case RAMSTER_REMOTIFY_FLUSH_OBJ:
++			zcache_remote_flush_object((struct flushlist_node *)u);
++			break;
++		default:
++			BUG();
++		}
++	}
++out:
++	return;
++}
++
++/*
++ * For now, just push over a few pages every few seconds to
++ * ensure that it basically works
++ */
++static struct workqueue_struct *ramster_remotify_workqueue;
++static void ramster_remotify_process(struct work_struct *work);
++static DECLARE_DELAYED_WORK(ramster_remotify_worker,
++		ramster_remotify_process);
++
++static void ramster_remotify_queue_delayed_work(unsigned long delay)
++{
++	if (!queue_delayed_work(ramster_remotify_workqueue,
++				&ramster_remotify_worker, delay))
++		pr_err("ramster_remotify: bad workqueue\n");
++}
++
++
++static int use_frontswap;
++static int use_cleancache;
++static void ramster_remotify_process(struct work_struct *work)
++{
++	BUG_ON(irqs_disabled());
++	ramster_remotify_queue_delayed_work(10 * HZ);
++#ifdef CONFIG_FRONTSWAP
++	if (use_frontswap && ramster_pers_remotify_enable)
++		zcache_do_remotify_ops(500); /* FIXME is this a good number? */
++#endif
++#ifdef CONFIG_CLEANCACHE
++	if (use_cleancache && ramster_eph_remotify_enable)
++		zbud_remotify_pages(100);
++#endif
++}
++
++static void ramster_remotify_init(void)
++{
++	unsigned long n = 60UL;
++	ramster_remotify_workqueue =
++		create_singlethread_workqueue("ramster_remotify");
++	ramster_remotify_queue_delayed_work(n * HZ);
++}
++
++
+ static void zbud_init(void)
+ {
+ 	int i;
+@@ -631,15 +1092,6 @@ static int zbud_show_cumul_chunk_counts(char *buf)
+  * necessary for decompression) immediately preceding the compressed data.
+  */
+ 
+-#define ZVH_SENTINEL  0x43214321
+-
+-struct zv_hdr {
+-	uint32_t pool_id;
+-	struct tmem_oid oid;
+-	uint32_t index;
+-	DECL_SENTINEL
+-};
+-
+ /* rudimentary policy limits */
+ /* total number of persistent pages may not exceed this percentage */
+ static unsigned int zv_page_count_policy_percent = 75;
+@@ -658,7 +1110,7 @@ static unsigned int zv_max_mean_zsize = (PAGE_SIZE / 8) * 5;
+ static unsigned long zv_curr_dist_counts[NCHUNKS];
+ static unsigned long zv_cumul_dist_counts[NCHUNKS];
+ 
+-static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,
++static struct zv_hdr *zv_create(struct zcache_client *cli, uint32_t pool_id,
+ 				struct tmem_oid *oid, uint32_t index,
+ 				void *cdata, unsigned clen)
+ {
+@@ -671,7 +1123,7 @@ static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,
+ 
+ 	BUG_ON(!irqs_disabled());
+ 	BUG_ON(chunks >= NCHUNKS);
+-	ret = xv_malloc(xvpool, alloc_size,
++	ret = xv_malloc(cli->xvpool, clen + sizeof(struct zv_hdr),
+ 			&page, &offset, ZCACHE_GFP_MASK);
+ 	if (unlikely(ret))
+ 		goto out;
+@@ -682,12 +1134,50 @@ static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,
+ 	zv->oid = *oid;
+ 	zv->pool_id = pool_id;
+ 	SET_SENTINEL(zv, ZVH);
++	INIT_LIST_HEAD(&zv->rem_op.list);
++	zv->client_id = get_client_id_from_client(cli);
++	zv->rem_op.op = RAMSTER_REMOTIFY_PERS_PUT;
++	if (zv->client_id == LOCAL_CLIENT) {
++		spin_lock(&zcache_rem_op_list_lock);
++		list_add_tail(&zv->rem_op.list, &zcache_rem_op_list);
++		spin_unlock(&zcache_rem_op_list_lock);
++	}
+ 	memcpy((char *)zv + sizeof(struct zv_hdr), cdata, clen);
+ 	kunmap_atomic(zv, KM_USER0);
+ out:
+ 	return zv;
+ }
+ 
++/* similar to zv_create, but just reserve space, no data yet */
++static struct zv_hdr *zv_alloc(struct tmem_pool *pool,
++				struct tmem_oid *oid, uint32_t index,
++				unsigned clen)
++{
++	struct zcache_client *cli = pool->client;
++	struct page *page;
++	struct zv_hdr *zv = NULL;
++	uint32_t offset;
++	int ret;
++
++	BUG_ON(!irqs_disabled());
++	BUG_ON(!is_local_client(pool->client));
++	ret = xv_malloc(cli->xvpool, clen + sizeof(struct zv_hdr),
++			&page, &offset, ZCACHE_GFP_MASK);
++	if (unlikely(ret))
++		goto out;
++	zv = kmap_atomic(page, KM_USER0) + offset;
++	SET_SENTINEL(zv, ZVH);
++	INIT_LIST_HEAD(&zv->rem_op.list);
++	zv->client_id = LOCAL_CLIENT;
++	zv->rem_op.op = RAMSTER_INTRANSIT_PERS;
++	zv->index = index;
++	zv->oid = *oid;
++	zv->pool_id = pool->pool_id;
++	kunmap_atomic(zv, KM_USER0);
++out:
++	return zv;
++}
++
+ static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv)
+ {
+ 	unsigned long flags;
+@@ -700,8 +1190,13 @@ static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv)
+ 	BUG_ON(chunks >= NCHUNKS);
+ 	zv_curr_dist_counts[chunks]--;
+ 	size -= sizeof(*zv);
++	spin_lock(&zcache_rem_op_list_lock);
++	size = xv_get_object_size(zv) - sizeof(*zv);
+ 	BUG_ON(size == 0);
+ 	INVERT_SENTINEL(zv, ZVH);
++	if (!list_empty(&zv->rem_op.list))
++		list_del_init(&zv->rem_op.list);
++	spin_unlock(&zcache_rem_op_list_lock);
+ 	page = virt_to_page(zv);
+ 	offset = (unsigned long)zv & ~PAGE_MASK;
+ 	local_irq_save(flags);
+@@ -727,6 +1222,29 @@ static void zv_decompress(struct page *page, struct zv_hdr *zv)
+ 	BUG_ON(clen != PAGE_SIZE);
+ }
+ 
++static void zv_copy_from_pampd(char *data, size_t *bufsize, struct zv_hdr *zv)
++{
++	unsigned size;
++
++	ASSERT_SENTINEL(zv, ZVH);
++	size = xv_get_object_size(zv) - sizeof(*zv);
++	BUG_ON(size == 0 || size > zv_max_page_size);
++	BUG_ON(size > *bufsize);
++	memcpy(data, (char *)zv + sizeof(*zv), size);
++	*bufsize = size;
++}
++
++static void zv_copy_to_pampd(struct zv_hdr *zv, char *data, size_t size)
++{
++	unsigned zv_size;
++
++	ASSERT_SENTINEL(zv, ZVH);
++	zv_size = xv_get_object_size(zv) - sizeof(*zv);
++	BUG_ON(zv_size != size);
++	BUG_ON(zv_size == 0 || zv_size > zv_max_page_size);
++	memcpy((char *)zv + sizeof(*zv), data, size);
++}
++
+ #ifdef CONFIG_SYSFS
+ /*
+  * show a distribution of compression stats for zv pages.
+@@ -979,6 +1497,7 @@ static DEFINE_SPINLOCK(zcache_direct_reclaim_lock);
+  */
+ static struct kmem_cache *zcache_objnode_cache;
+ static struct kmem_cache *zcache_obj_cache;
++static struct kmem_cache *ramster_flnode_cache;
+ static atomic_t zcache_curr_obj_count = ATOMIC_INIT(0);
+ static unsigned long zcache_curr_obj_count_max;
+ static atomic_t zcache_curr_objnode_count = ATOMIC_INIT(0);
+@@ -994,6 +1513,7 @@ struct zcache_preload {
+ 	struct tmem_obj *obj;
+ 	int nr;
+ 	struct tmem_objnode *objnodes[OBJNODE_TREE_MAX_PATH];
++	struct flushlist_node *flnode;
+ };
+ static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, };
+ 
+@@ -1002,6 +1522,7 @@ static int zcache_do_preload(struct tmem_pool *pool)
+ 	struct zcache_preload *kp;
+ 	struct tmem_objnode *objnode;
+ 	struct tmem_obj *obj;
++	struct flushlist_node *flnode;
+ 	void *page;
+ 	int ret = -ENOMEM;
+ 
+@@ -1009,10 +1530,6 @@ static int zcache_do_preload(struct tmem_pool *pool)
+ 		goto out;
+ 	if (unlikely(zcache_obj_cache == NULL))
+ 		goto out;
+-	if (!spin_trylock(&zcache_direct_reclaim_lock)) {
+-		zcache_aborted_preload++;
+-		goto out;
+-	}
+ 	preempt_disable();
+ 	kp = &__get_cpu_var(zcache_preloads);
+ 	while (kp->nr < ARRAY_SIZE(kp->objnodes)) {
+@@ -1036,6 +1553,11 @@ static int zcache_do_preload(struct tmem_pool *pool)
+ 		zcache_failed_alloc++;
+ 		goto unlock_out;
+ 	}
++	flnode = kmem_cache_alloc(ramster_flnode_cache, ZCACHE_GFP_MASK);
++	if (unlikely(flnode == NULL)) {
++		zcache_failed_alloc++;
++		goto unlock_out;
++	}
+ 	page = (void *)__get_free_page(ZCACHE_GFP_MASK);
+ 	if (unlikely(page == NULL)) {
+ 		zcache_failed_get_free_pages++;
+@@ -1048,17 +1570,40 @@ static int zcache_do_preload(struct tmem_pool *pool)
+ 		kp->obj = obj;
+ 	else
+ 		kmem_cache_free(zcache_obj_cache, obj);
++	if (kp->flnode == NULL)
++		kp->flnode = flnode;
++	else
++		kmem_cache_free(ramster_flnode_cache, flnode);
+ 	if (kp->page == NULL)
+ 		kp->page = page;
+ 	else
+ 		free_page((unsigned long)page);
+ 	ret = 0;
+ unlock_out:
+-	spin_unlock(&zcache_direct_reclaim_lock);
+ out:
+ 	return ret;
+ }
+ 
++static int ramster_do_preload_flnode_only(struct tmem_pool *pool)
++{
++	struct zcache_preload *kp;
++	struct flushlist_node *flnode;
++	int ret = -ENOMEM;
++
++	BUG_ON(!irqs_disabled());
++	if (unlikely(ramster_flnode_cache == NULL))
++		BUG();
++	kp = &__get_cpu_var(zcache_preloads);
++	flnode = kmem_cache_alloc(ramster_flnode_cache, GFP_ATOMIC);
++	if (unlikely(flnode == NULL) && kp->flnode == NULL)
++		BUG();  /* FIXME handle more gracefully, but how??? */
++	else if (kp->flnode == NULL)
++		kp->flnode = flnode;
++	else
++		kmem_cache_free(ramster_flnode_cache, flnode);
++	return ret;
++}
++
+ static void *zcache_get_free_page(void)
+ {
+ 	struct zcache_preload *kp;
+@@ -1131,6 +1676,30 @@ static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool)
+ 	kmem_cache_free(zcache_obj_cache, obj);
+ }
+ 
++static struct flushlist_node *ramster_flnode_alloc(struct tmem_pool *pool)
++{
++	struct flushlist_node *flnode = NULL;
++	struct zcache_preload *kp;
++	int count;
++
++	kp = &__get_cpu_var(zcache_preloads);
++	flnode = kp->flnode;
++	BUG_ON(flnode == NULL);
++	kp->flnode = NULL;
++	count = atomic_inc_return(&ramster_curr_flnode_count);
++	if (count > ramster_curr_flnode_count_max)
++		ramster_curr_flnode_count_max = count;
++	return flnode;
++}
++
++static void ramster_flnode_free(struct flushlist_node *flnode,
++				struct tmem_pool *pool)
++{
++	atomic_dec(&ramster_curr_flnode_count);
++	BUG_ON(atomic_read(&ramster_curr_flnode_count) < 0);
++	kmem_cache_free(ramster_flnode_cache, flnode);
++}
++
+ static struct tmem_hostops zcache_hostops = {
+ 	.obj_alloc = zcache_obj_alloc,
+ 	.obj_free = zcache_obj_free,
+@@ -1150,22 +1719,20 @@ static unsigned long zcache_curr_pers_pampd_count_max;
+ /* forward reference */
+ static int zcache_compress(struct page *from, void **out_va, size_t *out_len);
+ 
+-static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph,
++static int zcache_pampd_eph_create(char *data, size_t size, bool raw,
+ 				struct tmem_pool *pool, struct tmem_oid *oid,
+-				 uint32_t index)
++				uint32_t index, void **pampd)
+ {
+-	void *pampd = NULL, *cdata;
+-	size_t clen;
+-	int ret;
+-	unsigned long count;
+-	struct page *page = (struct page *)(data);
++	int ret = -1;
++	void *cdata = data;
++	size_t clen = size;
+ 	struct zcache_client *cli = pool->client;
+ 	uint16_t client_id = get_client_id_from_client(cli);
+-	unsigned long zv_mean_zsize;
+-	unsigned long curr_pers_pampd_count;
+-	u64 total_zsize;
++	struct page *page = NULL;
++	unsigned long count;
+ 
+-	if (eph) {
++	if (!raw) {
++		page = virt_to_page(data);
+ 		ret = zcache_compress(page, &cdata, &clen);
+ 		if (ret == 0)
+ 			goto out;
+@@ -1173,46 +1740,137 @@ static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph,
+ 			zcache_compress_poor++;
+ 			goto out;
+ 		}
+-		pampd = (void *)zbud_create(client_id, pool->pool_id, oid,
+-						index, page, cdata, clen);
+-		if (pampd != NULL) {
+-			count = atomic_inc_return(&zcache_curr_eph_pampd_count);
+-			if (count > zcache_curr_eph_pampd_count_max)
+-				zcache_curr_eph_pampd_count_max = count;
+-		}
+-	} else {
+-		curr_pers_pampd_count =
+-			atomic_read(&zcache_curr_pers_pampd_count);
+-		if (curr_pers_pampd_count >
+-		    (zv_page_count_policy_percent * totalram_pages) / 100)
+-			goto out;
+-		ret = zcache_compress(page, &cdata, &clen);
+-		if (ret == 0)
+-			goto out;
+-		/* reject if compression is too poor */
+-		if (clen > zv_max_zsize) {
+-			zcache_compress_poor++;
++	}
++	*pampd = (void *)zbud_create(client_id, pool->pool_id, oid,
++					index, page, cdata, clen);
++	if (*pampd == NULL) {
++		ret = -ENOMEM;
++		goto out;
++	}
++	ret = 0;
++	count = atomic_inc_return(&zcache_curr_eph_pampd_count);
++	if (count > zcache_curr_eph_pampd_count_max)
++		zcache_curr_eph_pampd_count_max = count;
++	if (client_id != LOCAL_CLIENT) {
++		count = atomic_inc_return(&ramster_foreign_eph_pampd_count);
++		if (count > ramster_foreign_eph_pampd_count_max)
++			ramster_foreign_eph_pampd_count_max = count;
++	}
++out:
++	return ret;
++}
++
++static int zcache_pampd_pers_create(char *data, size_t size, bool raw,
++				struct tmem_pool *pool, struct tmem_oid *oid,
++				uint32_t index, void **pampd)
++{
++	int ret = -1;
++	void *cdata = data;
++	size_t clen = size;
++	struct zcache_client *cli = pool->client;
++	struct page *page;
++	unsigned long count;
++	unsigned long zv_mean_zsize;
++	struct zv_hdr *zv;
++	long curr_pers_pampd_count;
++	u64 total_zsize;
++
++	curr_pers_pampd_count = atomic_read(&zcache_curr_pers_pampd_count) -
++			atomic_read(&ramster_remote_pers_pages);
++	/* should always be positive, but warn if accounting is off */
++	WARN_ON_ONCE(curr_pers_pampd_count < 0);
++	if (curr_pers_pampd_count >
++		    (zv_page_count_policy_percent * totalram_pages) / 100) {
++		zcache_policy_percent_exceeded++;
++#if 0
++{
++static unsigned long cnt;
++cnt++;
++if (!(cnt&(cnt-1)))
++pr_err("TESTING zppc policy cnt=%lu, curr=%lu, limit=%lu, totalram=%lu\n",
++cnt, curr_pers_pampd_count,
++((zv_page_count_policy_percent * totalram_pages) / 100), totalram_pages);
++}
++#endif
++		goto out;
++	}
++	if (raw)
++		goto ok_to_create;
++	page = virt_to_page(data);
++	if (zcache_compress(page, &cdata, &clen) == 0)
++		goto out;
++	/* reject if compression is too poor */
++	if (clen > zv_max_zsize) {
++		zcache_compress_poor++;
++		goto out;
++	}
++	/* reject if mean compression is too poor */
++	if ((clen > zv_max_mean_zsize) && (curr_pers_pampd_count > 0)) {
++		total_zsize = xv_get_total_size_bytes(cli->xvpool);
++		zv_mean_zsize = div_u64(total_zsize, curr_pers_pampd_count);
++		if (zv_mean_zsize > zv_max_mean_zsize) {
++			zcache_mean_compress_poor++;
+ 			goto out;
+ 		}
+-		/* reject if mean compression is too poor */
+-		if ((clen > zv_max_mean_zsize) && (curr_pers_pampd_count > 0)) {
+-			total_zsize = xv_get_total_size_bytes(cli->xvpool);
+-			zv_mean_zsize = div_u64(total_zsize,
+-						curr_pers_pampd_count);
+-			if (zv_mean_zsize > zv_max_mean_zsize) {
+-				zcache_mean_compress_poor++;
+-				goto out;
+-			}
+-		}
+-		pampd = (void *)zv_create(cli->xvpool, pool->pool_id,
+-						oid, index, cdata, clen);
+-		if (pampd == NULL)
+-			goto out;
+-		count = atomic_inc_return(&zcache_curr_pers_pampd_count);
+-		if (count > zcache_curr_pers_pampd_count_max)
+-			zcache_curr_pers_pampd_count_max = count;
+ 	}
++ok_to_create:
++	*pampd = (void *)zv_create(cli, pool->pool_id, oid, index, cdata, clen);
++	if (*pampd == NULL) {
++		ret = -ENOMEM;
++		goto out;
++	}
++	ret = 0;
++	count = atomic_inc_return(&zcache_curr_pers_pampd_count);
++	if (count > zcache_curr_pers_pampd_count_max)
++		zcache_curr_pers_pampd_count_max = count;
++	if (is_local_client(cli))
++		goto out;
++	zv = *(struct zv_hdr **)pampd;
++	count = atomic_inc_return(&ramster_foreign_pers_pampd_count);
++	if (count > ramster_foreign_pers_pampd_count_max)
++		ramster_foreign_pers_pampd_count_max = count;
+ out:
++#if 0
++if (ret == -ENOMEM) {
++static unsigned long cnt, lclcnt, fgncnt;
++cnt++;
++if (is_local_client(pool->client))
++ lclcnt++;
++else
++ fgncnt++;
++if (!(cnt&(cnt-1)))
++pr_err("TESTING zcache_pampd_create_PERS ENOMEM cnt=%lu, local=%lu, foreign=%lu, tot pampd_count=%lu, remote=%lu\n", cnt, lclcnt, fgncnt, (long)atomic_read(&zcache_curr_pers_pampd_count), (long)atomic_read(&ramster_remote_pers_pages));
++} else if (ret < 0) {
++static unsigned long cnt, lclcnt, fgncnt;
++cnt++;
++if (is_local_client(pool->client))
++ lclcnt++;
++else
++ fgncnt++;
++if (!(cnt&(cnt-1)))
++pr_err("TESTING zcache_pampd_create_PERS POLICYFAIL cnt=%lu, local=%lu, foreign=%lu, tot pampd_count=%lu, remote=%lu\n", cnt, lclcnt, fgncnt, (long)atomic_read(&zcache_curr_pers_pampd_count), (long)atomic_read(&ramster_remote_pers_pages));
++}
++#endif
++	return ret;
++}
++
++static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph,
++				struct tmem_pool *pool, struct tmem_oid *oid,
++				uint32_t index)
++{
++	void *pampd = NULL;
++	int ret;
++	bool ephemeral;
++
++	BUG_ON(preemptible());
++	ephemeral = (eph == 1) || ((eph == 0) && is_ephemeral(pool));
++	if (ephemeral)
++		ret = zcache_pampd_eph_create(data, size, raw, pool,
++						oid, index, &pampd);
++	else
++		ret = zcache_pampd_pers_create(data, size, raw, pool,
++						oid, index, &pampd);
++	/* FIXME add some counters here for failed creates? */
+ 	return pampd;
+ }
+ 
+@@ -1226,75 +1884,368 @@ static int zcache_pampd_get_data(char *data, size_t *bufsize, bool raw,
+ {
+ 	int ret = 0;
+ 
+-	BUG_ON(is_ephemeral(pool));
+-	zv_decompress((struct page *)(data), pampd);
++	BUG_ON(preemptible());
++	BUG_ON(is_ephemeral(pool)); /* Fix later for shared pools? */
++	BUG_ON(pampd_is_remote(pampd));
++	if (raw)
++		zv_copy_from_pampd(data, bufsize, pampd);
++	else
++		zv_decompress(virt_to_page(data), pampd);
+ 	return ret;
+ }
+ 
+-/*
+- * fill the pageframe corresponding to the struct page with the data
+- * from the passed pampd
+- */
+ static int zcache_pampd_get_data_and_free(char *data, size_t *bufsize, bool raw,
+ 					void *pampd, struct tmem_pool *pool,
+ 					struct tmem_oid *oid, uint32_t index)
+ {
+ 	int ret = 0;
++	unsigned long flags;
++	struct zcache_client *cli = pool->client;
+ 
+-	BUG_ON(!is_ephemeral(pool));
+-	zbud_decompress((struct page *)(data), pampd);
+-	zbud_free_and_delist((struct zbud_hdr *)pampd);
+-	atomic_dec(&zcache_curr_eph_pampd_count);
++	BUG_ON(preemptible());
++	BUG_ON(pampd_is_remote(pampd));
++	if (is_ephemeral(pool)) {
++		local_irq_save(flags);
++		if (raw)
++			zbud_copy_from_pampd(data, bufsize, pampd);
++		else
++			ret = zbud_decompress(virt_to_page(data), pampd);
++		zbud_free_and_delist((struct zbud_hdr *)pampd);
++		local_irq_restore(flags);
++		if (!is_local_client(cli)) {
++			atomic_dec(&ramster_foreign_eph_pampd_count);
++			WARN_ON_ONCE(atomic_read(&ramster_foreign_eph_pampd_count) < 0);
++		}
++		atomic_dec(&zcache_curr_eph_pampd_count);
++		WARN_ON_ONCE(atomic_read(&zcache_curr_eph_pampd_count) < 0);
++	} else {
++		if (is_local_client(cli))
++			BUG();
++		if (raw)
++			zv_copy_from_pampd(data, bufsize, pampd);
++		else
++			zv_decompress(virt_to_page(data), pampd);
++		zv_free(cli->xvpool, pampd);
++		if (!is_local_client(cli)) {
++			atomic_dec(&ramster_foreign_pers_pampd_count);
++			WARN_ON_ONCE(atomic_read(&ramster_foreign_pers_pampd_count) < 0);
++		}
++		atomic_dec(&zcache_curr_pers_pampd_count);
++		WARN_ON_ONCE(atomic_read(&zcache_curr_pers_pampd_count) < 0);
++		ret = 0;
++	}
+ 	return ret;
+ }
+ 
++static bool zcache_pampd_is_remote(void *pampd)
++{
++	return pampd_is_remote(pampd);
++}
++
+ /*
+  * free the pampd and remove it from any zcache lists
+  * pampd must no longer be pointed to from any tmem data structures!
+  */
+ static void zcache_pampd_free(void *pampd, struct tmem_pool *pool,
+-				struct tmem_oid *oid, uint32_t index)
++			      struct tmem_oid *oid, uint32_t index, bool acct)
+ {
+ 	struct zcache_client *cli = pool->client;
+-
+-	if (is_ephemeral(pool)) {
++	bool eph = is_ephemeral(pool);
++	struct zv_hdr *zv;
++
++	BUG_ON(preemptible());
++	if (pampd_is_remote(pampd)) {
++		WARN_ON(acct == false);
++		if (oid == NULL) {
++			/*
++			 * a NULL oid means to ignore this pampd free
++			 * as the remote freeing will be handled elsewhere
++			 */
++		} else if (eph) {
++			/* FIXME remote flush optional but probably good idea */
++			/* FIXME get these working properly again */
++			atomic_dec(&zcache_curr_eph_pampd_count);
++			WARN_ON_ONCE(atomic_read(&zcache_curr_eph_pampd_count) < 0);
++		} else if (pampd_is_intransit(pampd)) {
++			/* did a pers remote get_and_free, so just free local */
++			pampd = pampd_mask_intransit_and_remote(pampd);
++			goto local_pers;
++		} else {
++			struct flushlist_node *flnode =
++				ramster_flnode_alloc(pool);
++
++			flnode->xh.client_id = pampd_remote_node(pampd);
++			flnode->xh.pool_id = pool->pool_id;
++			flnode->xh.oid = *oid;
++			flnode->xh.index = index;
++			flnode->rem_op.op = RAMSTER_REMOTIFY_FLUSH_PAGE;
++			spin_lock(&zcache_rem_op_list_lock);
++			list_add(&flnode->rem_op.list, &zcache_rem_op_list);
++			spin_unlock(&zcache_rem_op_list_lock);
++			atomic_dec(&zcache_curr_pers_pampd_count);
++			WARN_ON_ONCE(atomic_read(&zcache_curr_pers_pampd_count) < 0);
++			atomic_dec(&ramster_remote_pers_pages);
++			WARN_ON_ONCE(atomic_read(&ramster_remote_pers_pages) < 0);
++		}
++	} else if (eph) {
+ 		zbud_free_and_delist((struct zbud_hdr *)pampd);
+-		atomic_dec(&zcache_curr_eph_pampd_count);
+-		BUG_ON(atomic_read(&zcache_curr_eph_pampd_count) < 0);
++		if (!is_local_client(pool->client)) {
++			atomic_dec(&ramster_foreign_eph_pampd_count);
++			WARN_ON_ONCE(atomic_read(&ramster_foreign_eph_pampd_count) < 0);
++		}
++		if (acct)
++			atomic_dec(&zcache_curr_eph_pampd_count);
++			/* FIXME get these working properly again */
++			WARN_ON_ONCE(atomic_read(&zcache_curr_eph_pampd_count) < 0);
+ 	} else {
+-		zv_free(cli->xvpool, (struct zv_hdr *)pampd);
+-		atomic_dec(&zcache_curr_pers_pampd_count);
+-		BUG_ON(atomic_read(&zcache_curr_pers_pampd_count) < 0);
++local_pers:
++		zv = (struct zv_hdr *)pampd;
++		if (!is_local_client(pool->client)) {
++			atomic_dec(&ramster_foreign_pers_pampd_count);
++			WARN_ON_ONCE(atomic_read(&ramster_foreign_pers_pampd_count) < 0);
++		}
++		zv_free(cli->xvpool, zv);
++		if (acct)
++			atomic_dec(&zcache_curr_pers_pampd_count);
++		/* FIXME get these working properly again */
++		WARN_ON_ONCE(atomic_read(&zcache_curr_pers_pampd_count) < 0);
+ 	}
+ }
+ 
+-static void zcache_pampd_free_obj(struct tmem_pool *pool, struct tmem_obj *obj)
++static void zcache_pampd_free_obj(struct tmem_pool *pool,
++					struct tmem_obj *obj)
+ {
++	struct flushlist_node *flnode;
++
++	BUG_ON(preemptible());
++	if (obj->extra == NULL)
++		return;
++	BUG_ON(!pampd_is_remote(obj->extra));
++	flnode = ramster_flnode_alloc(pool);
++	flnode->xh.client_id = pampd_remote_node(obj->extra);
++	flnode->xh.pool_id = pool->pool_id;
++	flnode->xh.oid = obj->oid;
++	flnode->xh.index = FLUSH_ENTIRE_OBJECT;
++	flnode->rem_op.op = RAMSTER_REMOTIFY_FLUSH_OBJ;
++	spin_lock(&zcache_rem_op_list_lock);
++	list_add(&flnode->rem_op.list, &zcache_rem_op_list);
++	spin_unlock(&zcache_rem_op_list_lock);
+ }
+ 
+-static void zcache_pampd_new_obj(struct tmem_obj *obj)
++void zcache_pampd_new_obj(struct tmem_obj *obj)
+ {
++	obj->extra = NULL;
+ }
+ 
+-static int zcache_pampd_replace_in_obj(void *pampd, struct tmem_obj *obj)
++int zcache_pampd_replace_in_obj(void *new_pampd, struct tmem_obj *obj)
+ {
+-	return -1;
++	int ret = -1;
++
++	if (new_pampd != NULL) {
++		if (obj->extra == NULL)
++			obj->extra = new_pampd;
++		/* enforce that all remote pages in an object reside
++		 * in the same node! */
++		else if (pampd_remote_node(new_pampd) !=
++				pampd_remote_node((void *)(obj->extra)))
++			BUG();
++		ret = 0;
++	}
++	return ret;
+ }
+ 
+-static bool zcache_pampd_is_remote(void *pampd)
++/*
++ * Called by the message handler after a (still compressed) page has been
++ * fetched from the remote machine in response to an "is_remote" tmem_get
++ * or persistent tmem_localify.  For a tmem_get, "extra" is the address of
++ * the page that is to be filled to succesfully resolve the tmem_get; for
++ * a (persistent) tmem_localify, "extra" is NULL (as the data is placed only
++ * in the local zcache).  "data" points to "size" bytes of (compressed) data
++ * passed in the message.  In the case of a persistent remote get, if
++ * pre-allocation was successful (see zcache_repatriate_preload), the page
++ * is placed into both local zcache and at "extra".
++ */
++int zcache_localify(int pool_id, struct tmem_oid *oidp,
++			uint32_t index, char *data, size_t size,
++			void *extra)
+ {
+-	return 0;
++	int ret = -ENOENT;
++	unsigned long flags;
++	struct tmem_pool *pool;
++	bool ephemeral, delete = false;
++	size_t clen = PAGE_SIZE;
++	void *pampd, *saved_hb;
++	struct tmem_obj *obj;
++
++	pool = zcache_get_pool_by_id(LOCAL_CLIENT, pool_id);
++	if (unlikely(pool == NULL))
++		/* pool doesn't exist anymore */
++		goto out;
++	ephemeral = is_ephemeral(pool);
++	local_irq_save(flags);  /* FIXME: maybe only disable softirqs? */
++	pampd = tmem_localify_get_pampd(pool, oidp, index, &obj, &saved_hb);
++	if (pampd == NULL) {
++		/* hmmm... must have been a flush while waiting */
++#if 1
++		pr_err("UNTESTED pampd==NULL in zcache_localify\n");
++#endif
++		if (ephemeral)
++			ramster_remote_eph_pages_unsucc_get++;
++		else
++			ramster_remote_pers_pages_unsucc_get++;
++		obj = NULL;
++		goto finish;
++	} else if (unlikely(!pampd_is_remote(pampd))) {
++		/* hmmm... must have been a dup put while waiting */
++#if 1
++		pr_err("UNTESTED dup while waiting in zcache_localify\n");
++#endif
++		if (ephemeral)
++			ramster_remote_eph_pages_unsucc_get++;
++		else
++			ramster_remote_pers_pages_unsucc_get++;
++		obj = NULL;
++		pampd = NULL;
++		ret = -EEXIST;
++		goto finish;
++	} else if (size == 0) {
++		/* no remote data, delete the local is_remote pampd */
++		pampd = NULL;
++		if (ephemeral)
++			ramster_remote_eph_pages_unsucc_get++;
++		else
++			BUG();
++		delete = true;
++		goto finish;
++	}
++	if (!ephemeral && pampd_is_intransit(pampd)) {
++		/* localify to zcache */
++		pampd = pampd_mask_intransit_and_remote(pampd);
++		zv_copy_to_pampd(pampd, data, size);
++	} else {
++		pampd = NULL;
++		obj = NULL;
++	}
++	if (extra != NULL) {
++		/* decompress direct-to-memory to complete remotify */
++		ret = lzo1x_decompress_safe((char *)data, size,
++						(char *)extra, &clen);
++		BUG_ON(ret != LZO_E_OK);
++		BUG_ON(clen != PAGE_SIZE);
++	}
++	if (ephemeral)
++		ramster_remote_eph_pages_succ_get++;
++	else
++		ramster_remote_pers_pages_succ_get++;
++	ret = 0;
++finish:
++	tmem_localify_finish(obj, index, pampd, saved_hb, delete);
++	zcache_put_pool(pool);
++	local_irq_restore(flags);
++out:
++	return ret;
++}
++
++/*
++ * Called on a remote persistent tmem_get to attempt to preallocate
++ * local storage for the data contained in the remote persistent page.
++ * If succesfully preallocated, returns the pampd, marked as remote and
++ * in_transit.  Else returns NULL.  Note that the appropriate tmem data
++ * structure must be locked.
++ */
++static void *zcache_pampd_repatriate_preload(void *pampd,
++						struct tmem_pool *pool,
++						struct tmem_oid *oid,
++						uint32_t index,
++						bool *intransit)
++{
++	int clen = pampd_remote_size(pampd);
++	void *ret_pampd = NULL;
++	unsigned long flags;
++
++	if (!pampd_is_remote(pampd))
++		BUG();
++	if (is_ephemeral(pool))
++		BUG();
++	if (pampd_is_intransit(pampd)) {
++		/*
++		 * to avoid multiple allocations (and maybe a memory leak)
++		 * don't preallocate if already in the process of being
++		 * repatriated
++		 */
++		*intransit = true;
++		goto out;
++	}
++#if 0
++{
++static unsigned long cnt;
++cnt++;
++if (!(cnt&(cnt-1)))
++pr_err("TESTING zcache_pampd_repat_preload, size=%d, cksum=??, cnt=%lu\n",
++clen, cnt);
++}
++#endif
++	*intransit = false;
++	local_irq_save(flags);
++	ret_pampd = (void *)zv_alloc(pool, oid, index, clen);
++	if (ret_pampd != NULL) {
++		/*
++		 *  a pampd is marked intransit if it is remote and space has
++		 *  been allocated for it locally (note, only happens for
++		 *  persistent pages, in which case the remote copy is freed)
++		 */
++		ret_pampd = pampd_mark_intransit(ret_pampd);
++		atomic_dec(&ramster_remote_pers_pages);
++		WARN_ON_ONCE(atomic_read(&ramster_remote_pers_pages) < 0);
++	} else
++		ramster_pers_pages_remote_nomem++;
++	local_irq_restore(flags);
++out:
++	return ret_pampd;
++}
++
++/*
++ * Called on a remote tmem_get to invoke a message to fetch the page.
++ * Might sleep so no tmem locks can be held.  "extra" is passed
++ * all the way through the round-trip messaging to zcache_localify.
++ */
++static int zcache_pampd_repatriate(void *fake_pampd, void *real_pampd,
++				   struct tmem_pool *pool,
++				   struct tmem_oid *oid, uint32_t index,
++				   bool free, void *extra)
++{
++	struct tmem_xhandle xh;
++	int ret;
++
++	if (pampd_is_intransit(real_pampd))
++		/* have local space pre-reserved, so free remote copy */
++		free = true;
++	xh = tmem_xhandle_fill(LOCAL_CLIENT, pool, oid, index);
++	/* unreliable request/response for now */
++	ret = ramster_remote_async_get(&xh, free,
++					pampd_remote_node(fake_pampd),
++					pampd_remote_size(fake_pampd),
++					pampd_remote_cksum(fake_pampd),
++					extra);
++#if 1
++	if (ret != 0 && ret != -ENOENT)
++		pr_err("TESTING zcache_pampd_repatriate returns, ret=%d\n",
++			ret);
++#endif
++	return ret;
+ }
+ 
+ static struct tmem_pamops zcache_pamops = {
+ 	.create = zcache_pampd_create,
+ 	.get_data = zcache_pampd_get_data,
+-	.get_data_and_free = zcache_pampd_get_data_and_free,
+ 	.free = zcache_pampd_free,
++	.get_data_and_free = zcache_pampd_get_data_and_free,
+ 	.free_obj = zcache_pampd_free_obj,
++	.is_remote = zcache_pampd_is_remote,
++	.repatriate_preload = zcache_pampd_repatriate_preload,
++	.repatriate = zcache_pampd_repatriate,
+ 	.new_obj = zcache_pampd_new_obj,
+ 	.replace_in_obj = zcache_pampd_replace_in_obj,
+-	.is_remote = zcache_pampd_is_remote,
+ };
+ 
+ /*
+@@ -1342,9 +2293,13 @@ static int zcache_cpu_notifier(struct notifier_block *nb,
+ 		per_cpu(zcache_workmem, cpu) =
+ 			kzalloc(LZO1X_MEM_COMPRESS,
+ 				GFP_KERNEL | __GFP_REPEAT);
++		per_cpu(zcache_remoteputmem, cpu) =
++			kzalloc(PAGE_SIZE, GFP_KERNEL | __GFP_REPEAT);
+ 		break;
+ 	case CPU_DEAD:
+ 	case CPU_UP_CANCELED:
++		kfree(per_cpu(zcache_remoteputmem, cpu));
++		per_cpu(zcache_remoteputmem, cpu) = NULL;
+ 		free_pages((unsigned long)per_cpu(zcache_dstmem, cpu),
+ 				LZO_DSTMEM_PAGE_ORDER);
+ 		per_cpu(zcache_dstmem, cpu) = NULL;
+@@ -1427,6 +2382,7 @@ ZCACHE_SYSFS_RO(aborted_preload);
+ ZCACHE_SYSFS_RO(aborted_shrink);
+ ZCACHE_SYSFS_RO(compress_poor);
+ ZCACHE_SYSFS_RO(mean_compress_poor);
++ZCACHE_SYSFS_RO(policy_percent_exceeded);
+ ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_raw_pages);
+ ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_zpages);
+ ZCACHE_SYSFS_RO_ATOMIC(curr_obj_count);
+@@ -1451,6 +2407,7 @@ static struct attribute *zcache_attrs[] = {
+ 	&zcache_flobj_found_attr.attr,
+ 	&zcache_failed_eph_puts_attr.attr,
+ 	&zcache_failed_pers_puts_attr.attr,
++	&zcache_policy_percent_exceeded_attr.attr,
+ 	&zcache_compress_poor_attr.attr,
+ 	&zcache_mean_compress_poor_attr.attr,
+ 	&zcache_zbud_curr_raw_pages_attr.attr,
+@@ -1483,6 +2440,151 @@ static struct attribute_group zcache_attr_group = {
+ 	.name = "zcache",
+ };
+ 
++#define RAMSTER_SYSFS_RO(_name) \
++	static ssize_t ramster_##_name##_show(struct kobject *kobj, \
++				struct kobj_attribute *attr, char *buf) \
++	{ \
++		return sprintf(buf, "%lu\n", ramster_##_name); \
++	} \
++	static struct kobj_attribute ramster_##_name##_attr = { \
++		.attr = { .name = __stringify(_name), .mode = 0444 }, \
++		.show = ramster_##_name##_show, \
++	}
++
++#define RAMSTER_SYSFS_RW(_name) \
++	static ssize_t ramster_##_name##_show(struct kobject *kobj, \
++				struct kobj_attribute *attr, char *buf) \
++	{ \
++		return sprintf(buf, "%lu\n", ramster_##_name); \
++	} \
++	static ssize_t ramster_##_name##_store(struct kobject *kobj, \
++		struct kobj_attribute *attr, const char *buf, size_t count) \
++	{ \
++		int err; \
++		unsigned long enable; \
++		err = strict_strtoul(buf, 10, &enable); \
++		if (err) \
++			return -EINVAL; \
++		ramster_##_name = enable; \
++		return count; \
++	} \
++	static struct kobj_attribute ramster_##_name##_attr = { \
++		.attr = { .name = __stringify(_name), .mode = 0644 }, \
++		.show = ramster_##_name##_show, \
++		.store = ramster_##_name##_store, \
++	}
++
++#define RAMSTER_SYSFS_RO_ATOMIC(_name) \
++	static ssize_t ramster_##_name##_show(struct kobject *kobj, \
++				struct kobj_attribute *attr, char *buf) \
++	{ \
++	    return sprintf(buf, "%d\n", atomic_read(&ramster_##_name)); \
++	} \
++	static struct kobj_attribute ramster_##_name##_attr = { \
++		.attr = { .name = __stringify(_name), .mode = 0444 }, \
++		.show = ramster_##_name##_show, \
++	}
++
++RAMSTER_SYSFS_RO_ATOMIC(remote_pers_pages);
++RAMSTER_SYSFS_RW(pers_remotify_enable);
++RAMSTER_SYSFS_RW(eph_remotify_enable);
++RAMSTER_SYSFS_RO(eph_pages_remoted);
++RAMSTER_SYSFS_RO(eph_pages_remote_failed);
++RAMSTER_SYSFS_RO(pers_pages_remoted);
++RAMSTER_SYSFS_RO(pers_pages_remote_failed);
++RAMSTER_SYSFS_RO(pers_pages_remote_nomem);
++RAMSTER_SYSFS_RO(remote_pages_flushed);
++RAMSTER_SYSFS_RO(remote_page_flushes_failed);
++RAMSTER_SYSFS_RO(remote_objects_flushed);
++RAMSTER_SYSFS_RO(remote_object_flushes_failed);
++RAMSTER_SYSFS_RO(remote_eph_pages_succ_get);
++RAMSTER_SYSFS_RO(remote_eph_pages_unsucc_get);
++RAMSTER_SYSFS_RO(remote_pers_pages_succ_get);
++RAMSTER_SYSFS_RO(remote_pers_pages_unsucc_get);
++RAMSTER_SYSFS_RO_ATOMIC(foreign_eph_pampd_count);
++RAMSTER_SYSFS_RO(foreign_eph_pampd_count_max);
++RAMSTER_SYSFS_RO_ATOMIC(foreign_pers_pampd_count);
++RAMSTER_SYSFS_RO(foreign_pers_pampd_count_max);
++RAMSTER_SYSFS_RO_ATOMIC(curr_flnode_count);
++RAMSTER_SYSFS_RO(curr_flnode_count_max);
++
++#define MANUAL_NODES 8
++static bool ramster_nodes_manual_up[MANUAL_NODES];
++static ssize_t ramster_manual_node_up_show(struct kobject *kobj,
++				struct kobj_attribute *attr, char *buf)
++{
++	int i;
++	char *p = buf;
++	for (i = 0; i < MANUAL_NODES; i++)
++		if (ramster_nodes_manual_up[i])
++			p += sprintf(p, "%d ", i);
++	p += sprintf(p, "\n");
++	return p - buf;
++}
++
++static ssize_t ramster_manual_node_up_store(struct kobject *kobj,
++		struct kobj_attribute *attr, const char *buf, size_t count)
++{
++	int err;
++	unsigned long node_num;
++	extern void o2net_hb_node_up_manual(int);
++
++	err = strict_strtoul(buf, 10, &node_num);
++	if (err) {
++		pr_err("bad strtoul?\n");
++		return -EINVAL;
++	}
++	if (node_num >= MANUAL_NODES) {
++		pr_err("bad node_num=%lu?\n", node_num);
++		return -EINVAL;
++	}
++	if (ramster_nodes_manual_up[node_num]) {
++		pr_err("node %d already up, ignoring\n", (int)node_num);
++	} else {
++		ramster_nodes_manual_up[node_num] = true;
++		o2net_hb_node_up_manual((int)node_num);
++	}
++	return count;
++}
++
++static struct kobj_attribute ramster_manual_node_up_attr = {
++	.attr = { .name = "manual_node_up", .mode = 0644 },
++	.show = ramster_manual_node_up_show,
++	.store = ramster_manual_node_up_store,
++};
++
++static struct attribute *ramster_attrs[] = {
++	&ramster_pers_remotify_enable_attr.attr,
++	&ramster_eph_remotify_enable_attr.attr,
++	&ramster_remote_pers_pages_attr.attr,
++	&ramster_eph_pages_remoted_attr.attr,
++	&ramster_eph_pages_remote_failed_attr.attr,
++	&ramster_pers_pages_remoted_attr.attr,
++	&ramster_pers_pages_remote_failed_attr.attr,
++	&ramster_pers_pages_remote_nomem_attr.attr,
++	&ramster_remote_pages_flushed_attr.attr,
++	&ramster_remote_page_flushes_failed_attr.attr,
++	&ramster_remote_objects_flushed_attr.attr,
++	&ramster_remote_object_flushes_failed_attr.attr,
++	&ramster_remote_eph_pages_succ_get_attr.attr,
++	&ramster_remote_eph_pages_unsucc_get_attr.attr,
++	&ramster_remote_pers_pages_succ_get_attr.attr,
++	&ramster_remote_pers_pages_unsucc_get_attr.attr,
++	&ramster_foreign_eph_pampd_count_attr.attr,
++	&ramster_foreign_eph_pampd_count_max_attr.attr,
++	&ramster_foreign_pers_pampd_count_attr.attr,
++	&ramster_foreign_pers_pampd_count_max_attr.attr,
++	&ramster_curr_flnode_count_attr.attr,
++	&ramster_curr_flnode_count_max_attr.attr,
++	&ramster_manual_node_up_attr.attr,
++	NULL,
++};
++
++static struct attribute_group ramster_attr_group = {
++	.attrs = ramster_attrs,
++	.name = "ramster",
++};
++
+ #endif /* CONFIG_SYSFS */
+ /*
+  * When zcache is disabled ("frozen"), pools can be created and destroyed,
+@@ -1527,8 +2629,9 @@ static struct shrinker zcache_shrinker = {
+  * zcache shims between cleancache/frontswap ops and tmem
+  */
+ 
+-static int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp,
+-				uint32_t index, struct page *page)
++int zcache_put(int cli_id, int pool_id, struct tmem_oid *oidp,
++			uint32_t index, char *data, size_t size,
++			bool raw, int ephemeral)
+ {
+ 	struct tmem_pool *pool;
+ 	int ret = -1;
+@@ -1539,8 +2642,7 @@ static int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp,
+ 		goto out;
+ 	if (!zcache_freeze && zcache_do_preload(pool) == 0) {
+ 		/* preload does preempt_disable on success */
+-		ret = tmem_put(pool, oidp, index, (char *)(page),
+-				PAGE_SIZE, 0, is_ephemeral(pool));
++		ret = tmem_put(pool, oidp, index, data, size, raw, ephemeral);
+ 		if (ret < 0) {
+ 			if (is_ephemeral(pool))
+ 				zcache_failed_eph_puts++;
+@@ -1560,27 +2662,40 @@ out:
+ 	return ret;
+ }
+ 
+-static int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp,
+-				uint32_t index, struct page *page)
++int zcache_get(int cli_id, int pool_id, struct tmem_oid *oidp,
++			uint32_t index, char *data, size_t *sizep,
++			bool raw, int get_and_free)
+ {
+ 	struct tmem_pool *pool;
+ 	int ret = -1;
+-	unsigned long flags;
+-	size_t size = PAGE_SIZE;
++	bool eph;
+ 
+-	local_irq_save(flags);
++	if (!raw) {
++		BUG_ON(irqs_disabled());
++		BUG_ON(in_softirq());
++	}
+ 	pool = zcache_get_pool_by_id(cli_id, pool_id);
++	eph = is_ephemeral(pool);
+ 	if (likely(pool != NULL)) {
+ 		if (atomic_read(&pool->obj_count) > 0)
+-			ret = tmem_get(pool, oidp, index, (char *)(page),
+-					&size, 0, is_ephemeral(pool));
++			ret = tmem_get(pool, oidp, index, data, sizep,
++					raw, get_and_free);
+ 		zcache_put_pool(pool);
+ 	}
+-	local_irq_restore(flags);
++	WARN_ONCE((!eph && (ret != 0)), "zcache_get fails on persistent pool, "
++			  "bad things are very likely to happen soon\n");
++#if 1
++{
++if (ret != 0 && ret != -1 && !(ret == -EINVAL && is_ephemeral(pool)))
++pr_err("TESTING zcache_get tmem_get returns ret=%d\n", ret);
++}
++#endif
++	if (ret == -EAGAIN)
++		BUG(); /* FIXME... don't need this anymore??? let's ensure */
+ 	return ret;
+ }
+ 
+-static int zcache_flush_page(int cli_id, int pool_id,
++int zcache_flush(int cli_id, int pool_id,
+ 				struct tmem_oid *oidp, uint32_t index)
+ {
+ 	struct tmem_pool *pool;
+@@ -1590,6 +2705,7 @@ static int zcache_flush_page(int cli_id, int pool_id,
+ 	local_irq_save(flags);
+ 	zcache_flush_total++;
+ 	pool = zcache_get_pool_by_id(cli_id, pool_id);
++	ramster_do_preload_flnode_only(pool);
+ 	if (likely(pool != NULL)) {
+ 		if (atomic_read(&pool->obj_count) > 0)
+ 			ret = tmem_flush_page(pool, oidp, index);
+@@ -1601,8 +2717,7 @@ static int zcache_flush_page(int cli_id, int pool_id,
+ 	return ret;
+ }
+ 
+-static int zcache_flush_object(int cli_id, int pool_id,
+-				struct tmem_oid *oidp)
++int zcache_flush_object(int cli_id, int pool_id, struct tmem_oid *oidp)
+ {
+ 	struct tmem_pool *pool;
+ 	int ret = -1;
+@@ -1611,6 +2726,7 @@ static int zcache_flush_object(int cli_id, int pool_id,
+ 	local_irq_save(flags);
+ 	zcache_flobj_total++;
+ 	pool = zcache_get_pool_by_id(cli_id, pool_id);
++	ramster_do_preload_flnode_only(pool);
+ 	if (likely(pool != NULL)) {
+ 		if (atomic_read(&pool->obj_count) > 0)
+ 			ret = tmem_flush_object(pool, oidp);
+@@ -1622,7 +2738,7 @@ static int zcache_flush_object(int cli_id, int pool_id,
+ 	return ret;
+ }
+ 
+-static int zcache_destroy_pool(int cli_id, int pool_id)
++int zcache_client_destroy_pool(int cli_id, int pool_id)
+ {
+ 	struct tmem_pool *pool = NULL;
+ 	struct zcache_client *cli = NULL;
+@@ -1649,13 +2765,17 @@ static int zcache_destroy_pool(int cli_id, int pool_id)
+ 	ret = tmem_destroy_pool(pool);
+ 	local_bh_enable();
+ 	kfree(pool);
+-	pr_info("zcache: destroyed pool id=%d, cli_id=%d\n",
+-			pool_id, cli_id);
++	pr_info("ramster: destroyed pool id=%d cli_id=%d\n", pool_id, cli_id);
+ out:
+ 	return ret;
+ }
+ 
+-static int zcache_new_pool(uint16_t cli_id, uint32_t flags)
++static int zcache_destroy_pool(int pool_id)
++{
++	return zcache_client_destroy_pool(LOCAL_CLIENT, pool_id);
++}
++
++int zcache_new_pool(uint16_t cli_id, uint32_t flags)
+ {
+ 	int poolid = -1;
+ 	struct tmem_pool *pool;
+@@ -1670,7 +2790,7 @@ static int zcache_new_pool(uint16_t cli_id, uint32_t flags)
+ 	atomic_inc(&cli->refcount);
+ 	pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL);
+ 	if (pool == NULL) {
+-		pr_info("zcache: pool creation failed: out of memory\n");
++		pr_info("ramster: pool creation failed: out of memory\n");
+ 		goto out;
+ 	}
+ 
+@@ -1678,7 +2798,7 @@ static int zcache_new_pool(uint16_t cli_id, uint32_t flags)
+ 		if (cli->tmem_pools[poolid] == NULL)
+ 			break;
+ 	if (poolid >= MAX_POOLS_PER_CLIENT) {
+-		pr_info("zcache: pool creation failed: max exceeded\n");
++		pr_info("ramster: pool creation failed: max exceeded\n");
+ 		kfree(pool);
+ 		poolid = -1;
+ 		goto out;
+@@ -1688,7 +2808,7 @@ static int zcache_new_pool(uint16_t cli_id, uint32_t flags)
+ 	pool->pool_id = poolid;
+ 	tmem_new_pool(pool, flags);
+ 	cli->tmem_pools[poolid] = pool;
+-	pr_info("zcache: created %s tmem pool, id=%d, client=%d\n",
++	pr_info("ramster: created %s tmem pool, id=%d, client=%d\n",
+ 		flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
+ 		poolid, cli_id);
+ out:
+@@ -1697,6 +2817,64 @@ out:
+ 	return poolid;
+ }
+ 
++static int zcache_local_new_pool(uint32_t flags)
++{
++	return zcache_new_pool(LOCAL_CLIENT, flags);
++}
++
++int zcache_autocreate_pool(int cli_id, int pool_id, bool ephemeral)
++{
++	struct tmem_pool *pool;
++	struct zcache_client *cli = NULL;
++	uint32_t flags = ephemeral ? 0 : TMEM_POOL_PERSIST;
++	int ret = -1;
++
++	if (cli_id == LOCAL_CLIENT)
++		goto out;
++	if (pool_id >= MAX_POOLS_PER_CLIENT)
++		goto out;
++	else if ((unsigned int)cli_id < MAX_CLIENTS)
++		cli = &zcache_clients[cli_id];
++	if ((ephemeral && !use_cleancache) || (!ephemeral && !use_frontswap))
++		BUG(); /* FIXME, handle more gracefully later */
++	if (!cli->allocated) {
++		if (zcache_new_client(cli_id))
++			BUG(); /* FIXME, handle more gracefully later */
++		cli = &zcache_clients[cli_id];
++	}
++	atomic_inc(&cli->refcount);
++	pool = cli->tmem_pools[pool_id];
++	if (pool != NULL) {
++		if (pool->persistent && ephemeral) {
++			pr_err("zcache_autocreate_pool: type mismatch\n");
++			goto out;
++		}
++		ret = 0;
++		goto out;
++	}
++	pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL);
++	if (pool == NULL) {
++		pr_info("ramster: pool creation failed: out of memory\n");
++		goto out;
++	}
++	atomic_set(&pool->refcount, 0);
++	pool->client = cli;
++	pool->pool_id = pool_id;
++	tmem_new_pool(pool, flags);
++	cli->tmem_pools[pool_id] = pool;
++	pr_info("ramster: AUTOcreated %s tmem poolid=%d, for remote client=%d\n",
++		flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
++		pool_id, cli_id);
++	ret = 0;
++out:
++	if (cli == NULL)
++		BUG(); /* FIXME, handle more gracefully later */
++		/* pr_err("zcache_autocreate_pool: failed\n"); */
++	if (cli != NULL)
++		atomic_dec(&cli->refcount);
++	return ret;
++}
++
+ /**********
+  * Two kernel functionalities currently can be layered on top of tmem.
+  * These are "cleancache" which is used as a second-chance cache for clean
+@@ -1713,8 +2891,12 @@ static void zcache_cleancache_put_page(int pool_id,
+ 	u32 ind = (u32) index;
+ 	struct tmem_oid oid = *(struct tmem_oid *)&key;
+ 
+-	if (likely(ind == index))
+-		(void)zcache_put_page(LOCAL_CLIENT, pool_id, &oid, index, page);
++	if (likely(ind == index)) {
++		char *kva = page_address(page);
++
++		(void)zcache_put(LOCAL_CLIENT, pool_id, &oid, index,
++			kva, PAGE_SIZE, 0, 1);
++	}
+ }
+ 
+ static int zcache_cleancache_get_page(int pool_id,
+@@ -1725,8 +2907,15 @@ static int zcache_cleancache_get_page(int pool_id,
+ 	struct tmem_oid oid = *(struct tmem_oid *)&key;
+ 	int ret = -1;
+ 
+-	if (likely(ind == index))
+-		ret = zcache_get_page(LOCAL_CLIENT, pool_id, &oid, index, page);
++	preempt_disable();
++	if (likely(ind == index)) {
++		char *kva = page_address(page);
++		size_t size = PAGE_SIZE;
++
++		ret = zcache_get(LOCAL_CLIENT, pool_id, &oid, index,
++			kva, &size, 0, 0);
++	}
++	preempt_enable();
+ 	return ret;
+ }
+ 
+@@ -1738,7 +2927,7 @@ static void zcache_cleancache_flush_page(int pool_id,
+ 	struct tmem_oid oid = *(struct tmem_oid *)&key;
+ 
+ 	if (likely(ind == index))
+-		(void)zcache_flush_page(LOCAL_CLIENT, pool_id, &oid, ind);
++		(void)zcache_flush(LOCAL_CLIENT, pool_id, &oid, ind);
+ }
+ 
+ static void zcache_cleancache_flush_inode(int pool_id,
+@@ -1752,7 +2941,7 @@ static void zcache_cleancache_flush_inode(int pool_id,
+ static void zcache_cleancache_flush_fs(int pool_id)
+ {
+ 	if (pool_id >= 0)
+-		(void)zcache_destroy_pool(LOCAL_CLIENT, pool_id);
++		(void)zcache_destroy_pool(pool_id);
+ }
+ 
+ static int zcache_cleancache_init_fs(size_t pagesize)
+@@ -1760,7 +2949,7 @@ static int zcache_cleancache_init_fs(size_t pagesize)
+ 	BUG_ON(sizeof(struct cleancache_filekey) !=
+ 				sizeof(struct tmem_oid));
+ 	BUG_ON(pagesize != PAGE_SIZE);
+-	return zcache_new_pool(LOCAL_CLIENT, 0);
++	return zcache_local_new_pool(0);
+ }
+ 
+ static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize)
+@@ -1769,7 +2958,7 @@ static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize)
+ 	BUG_ON(sizeof(struct cleancache_filekey) !=
+ 				sizeof(struct tmem_oid));
+ 	BUG_ON(pagesize != PAGE_SIZE);
+-	return zcache_new_pool(LOCAL_CLIENT, 0);
++	return zcache_local_new_pool(0);
+ }
+ 
+ static struct cleancache_ops zcache_cleancache_ops = {
+@@ -1799,7 +2988,7 @@ static int zcache_frontswap_poolid = -1;
+  * Swizzling increases objects per swaptype, increasing tmem concurrency
+  * for heavy swaploads.  Later, larger nr_cpus -> larger SWIZ_BITS
+  */
+-#define SWIZ_BITS		4
++#define SWIZ_BITS		8
+ #define SWIZ_MASK		((1 << SWIZ_BITS) - 1)
+ #define _oswiz(_type, _ind)	((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
+ #define iswiz(_ind)		(_ind >> SWIZ_BITS)
+@@ -1819,12 +3008,14 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
+ 	struct tmem_oid oid = oswiz(type, ind);
+ 	int ret = -1;
+ 	unsigned long flags;
++	char *kva;
+ 
+ 	BUG_ON(!PageLocked(page));
+ 	if (likely(ind64 == ind)) {
+ 		local_irq_save(flags);
+-		ret = zcache_put_page(LOCAL_CLIENT, zcache_frontswap_poolid,
+-					&oid, iswiz(ind), page);
++		kva = page_address(page);
++		ret = zcache_put(LOCAL_CLIENT, zcache_frontswap_poolid,
++				&oid, iswiz(ind), kva, PAGE_SIZE, 0, 0);
+ 		local_irq_restore(flags);
+ 	}
+ 	return ret;
+@@ -1840,10 +3031,16 @@ static int zcache_frontswap_get_page(unsigned type, pgoff_t offset,
+ 	struct tmem_oid oid = oswiz(type, ind);
+ 	int ret = -1;
+ 
++	preempt_disable(); /* FIXME, remove this? */
+ 	BUG_ON(!PageLocked(page));
+-	if (likely(ind64 == ind))
+-		ret = zcache_get_page(LOCAL_CLIENT, zcache_frontswap_poolid,
+-					&oid, iswiz(ind), page);
++	if (likely(ind64 == ind)) {
++		char *kva = page_address(page);
++		size_t size = PAGE_SIZE;
++
++		ret = zcache_get(LOCAL_CLIENT, zcache_frontswap_poolid,
++					&oid, iswiz(ind), kva, &size, 0, -1);
++	}
++	preempt_enable(); /* FIXME, remove this? */
+ 	return ret;
+ }
+ 
+@@ -1855,7 +3052,7 @@ static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset)
+ 	struct tmem_oid oid = oswiz(type, ind);
+ 
+ 	if (likely(ind64 == ind))
+-		(void)zcache_flush_page(LOCAL_CLIENT, zcache_frontswap_poolid,
++		(void)zcache_flush(LOCAL_CLIENT, zcache_frontswap_poolid,
+ 					&oid, iswiz(ind));
+ }
+ 
+@@ -1877,7 +3074,7 @@ static void zcache_frontswap_init(unsigned ignored)
+ 	/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
+ 	if (zcache_frontswap_poolid < 0)
+ 		zcache_frontswap_poolid =
+-			zcache_new_pool(LOCAL_CLIENT, TMEM_POOL_PERSIST);
++				zcache_local_new_pool(TMEM_POOL_PERSIST);
+ }
+ 
+ static struct frontswap_ops zcache_frontswap_ops = {
+@@ -1898,19 +3095,125 @@ struct frontswap_ops zcache_frontswap_register_ops(void)
+ #endif
+ 
+ /*
++ * frontswap selfshrinking
++ */
++
++#ifdef CONFIG_FRONTSWAP
++/* In HZ, controls frequency of worker invocation. */
++static unsigned int selfshrink_interval __read_mostly = 5;
++
++static void selfshrink_process(struct work_struct *work);
++static DECLARE_DELAYED_WORK(selfshrink_worker, selfshrink_process);
++
++/* Enable/disable with sysfs. */
++static bool frontswap_selfshrinking __read_mostly;
++
++/* Enable/disable with kernel boot option. */
++static bool use_frontswap_selfshrink __initdata = true;
++
++/*
++ * The default values for the following parameters were deemed reasonable
++ * by experimentation, may be workload-dependent, and can all be
++ * adjusted via sysfs.
++ */
++
++/* Control rate for frontswap shrinking. Higher hysteresis is slower. */
++static unsigned int frontswap_hysteresis __read_mostly = 20;
++
++/*
++ * Number of selfshrink worker invocations to wait before observing that
++ * frontswap selfshrinking should commence. Note that selfshrinking does
++ * not use a separate worker thread.
++ */
++static unsigned int frontswap_inertia __read_mostly = 3;
++
++/* Countdown to next invocation of frontswap_shrink() */
++static unsigned long frontswap_inertia_counter;
++
++/*
++ * Invoked by the selfshrink worker thread, uses current number of pages
++ * in frontswap (frontswap_curr_pages()), previous status, and control
++ * values (hysteresis and inertia) to determine if frontswap should be
++ * shrunk and what the new frontswap size should be.  Note that
++ * frontswap_shrink is essentially a partial swapoff that immediately
++ * transfers pages from the "swap device" (frontswap) back into kernel
++ * RAM; despite the name, frontswap "shrinking" is very different from
++ * the "shrinker" interface used by the kernel MM subsystem to reclaim
++ * memory.
++ */
++static void frontswap_selfshrink(void)
++{
++	static unsigned long cur_frontswap_pages;
++	static unsigned long last_frontswap_pages;
++	static unsigned long tgt_frontswap_pages;
++
++	last_frontswap_pages = cur_frontswap_pages;
++	cur_frontswap_pages = frontswap_curr_pages();
++	if (!cur_frontswap_pages ||
++			(cur_frontswap_pages > last_frontswap_pages)) {
++		frontswap_inertia_counter = frontswap_inertia;
++		return;
++	}
++	if (frontswap_inertia_counter && --frontswap_inertia_counter)
++		return;
++	if (cur_frontswap_pages <= frontswap_hysteresis)
++		tgt_frontswap_pages = 0;
++	else
++		tgt_frontswap_pages = cur_frontswap_pages -
++			(cur_frontswap_pages / frontswap_hysteresis);
++	frontswap_shrink(tgt_frontswap_pages);
++}
++
++static int __init ramster_nofrontswap_selfshrink_setup(char *s)
++{
++	use_frontswap_selfshrink = false;
++	return 1;
++}
++
++__setup("noselfshrink", ramster_nofrontswap_selfshrink_setup);
++
++static void selfshrink_process(struct work_struct *work)
++{
++	if (frontswap_selfshrinking && frontswap_enabled) {
++		frontswap_selfshrink();
++		schedule_delayed_work(&selfshrink_worker,
++			selfshrink_interval * HZ);
++	}
++}
++
++static int ramster_enabled;
++
++static int __init ramster_selfshrink_init(void)
++{
++	frontswap_selfshrinking = ramster_enabled && use_frontswap_selfshrink;
++	if (frontswap_selfshrinking)
++		pr_info("ramster: Initializing frontswap "
++					"selfshrinking driver.\n");
++	else
++		return -ENODEV;
++
++	schedule_delayed_work(&selfshrink_worker, selfshrink_interval * HZ);
++
++	return 0;
++}
++
++subsys_initcall(ramster_selfshrink_init);
++#endif
++
++/*
+  * zcache initialization
+- * NOTE FOR NOW zcache MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR
++ * NOTE FOR NOW ramster MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR
+  * NOTHING HAPPENS!
+  */
+ 
+-static int zcache_enabled;
++static int ramster_enabled;
+ 
+-static int __init enable_zcache(char *s)
++static int __init enable_ramster(char *s)
+ {
+-	zcache_enabled = 1;
++	ramster_enabled = 1;
+ 	return 1;
+ }
+-__setup("zcache", enable_zcache);
++__setup("ramster", enable_ramster);
+ 
+ /* allow independent dynamic disabling of cleancache and frontswap */
+ 
+@@ -1918,16 +3221,22 @@ static int use_cleancache = 1;
+ 
+ static int __init no_cleancache(char *s)
+ {
++	pr_info("INIT no_cleancache called\n");
+ 	use_cleancache = 0;
+ 	return 1;
+ }
+ 
+-__setup("nocleancache", no_cleancache);
++/*
++ * FIXME: need to guarantee this gets checked before zcache_init is called
++ * What is the correct way to achieve this?
++ */
++early_param("nocleancache", no_cleancache);
+ 
+ static int use_frontswap = 1;
+ 
+ static int __init no_frontswap(char *s)
+ {
++	pr_info("INIT no_frontswap called\n");
+ 	use_frontswap = 0;
+ 	return 1;
+ }
+@@ -1940,20 +3249,22 @@ static int __init zcache_init(void)
+ 
+ #ifdef CONFIG_SYSFS
+ 	ret = sysfs_create_group(mm_kobj, &zcache_attr_group);
++	ret = sysfs_create_group(mm_kobj, &ramster_attr_group);
+ 	if (ret) {
+-		pr_err("zcache: can't create sysfs\n");
++		pr_err("ramster: can't create sysfs\n");
+ 		goto out;
+ 	}
+ #endif /* CONFIG_SYSFS */
+ #if defined(CONFIG_CLEANCACHE) || defined(CONFIG_FRONTSWAP)
+-	if (zcache_enabled) {
++	if (ramster_enabled) {
+ 		unsigned int cpu;
+ 
++		(void)ramster_o2net_register_handlers();
+ 		tmem_register_hostops(&zcache_hostops);
+ 		tmem_register_pamops(&zcache_pamops);
+ 		ret = register_cpu_notifier(&zcache_cpu_notifier_block);
+ 		if (ret) {
+-			pr_err("zcache: can't register cpu notifier\n");
++			pr_err("ramster: can't register cpu notifier\n");
+ 			goto out;
+ 		}
+ 		for_each_online_cpu(cpu) {
+@@ -1966,35 +3277,39 @@ static int __init zcache_init(void)
+ 				sizeof(struct tmem_objnode), 0, 0, NULL);
+ 	zcache_obj_cache = kmem_cache_create("zcache_obj",
+ 				sizeof(struct tmem_obj), 0, 0, NULL);
+-	ret = zcache_new_client(LOCAL_CLIENT);
+-	if (ret) {
+-		pr_err("zcache: can't create client\n");
+-		goto out;
+-	}
++	ramster_flnode_cache = kmem_cache_create("ramster_flnode",
++				sizeof(struct flushlist_node), 0, 0, NULL);
+ #endif
+ #ifdef CONFIG_CLEANCACHE
+-	if (zcache_enabled && use_cleancache) {
++	pr_info("INIT ramster_enabled=%d use_cleancache=%d\n",
++					ramster_enabled, use_cleancache);
++	if (ramster_enabled && use_cleancache) {
+ 		struct cleancache_ops old_ops;
+ 
+ 		zbud_init();
+ 		register_shrinker(&zcache_shrinker);
+ 		old_ops = zcache_cleancache_register_ops();
+-		pr_info("zcache: cleancache enabled using kernel "
++		pr_info("ramster: cleancache enabled using kernel "
+ 			"transcendent memory and compression buddies\n");
+ 		if (old_ops.init_fs != NULL)
+-			pr_warning("zcache: cleancache_ops overridden");
++			pr_warning("ramster: cleancache_ops overridden");
+ 	}
+ #endif
+ #ifdef CONFIG_FRONTSWAP
+-	if (zcache_enabled && use_frontswap) {
++	pr_info("INIT ramster_enabled=%d use_frontswap=%d\n",
++					ramster_enabled, use_frontswap);
++	if (ramster_enabled && use_frontswap) {
+ 		struct frontswap_ops old_ops;
+ 
++		zcache_new_client(LOCAL_CLIENT);
+ 		old_ops = zcache_frontswap_register_ops();
+-		pr_info("zcache: frontswap enabled using kernel "
++		pr_info("ramster: frontswap enabled using kernel "
+ 			"transcendent memory and xvmalloc\n");
+ 		if (old_ops.init != NULL)
+ 			pr_warning("ktmem: frontswap_ops overridden");
+ 	}
++	if (ramster_enabled && (use_frontswap || use_cleancache))
++		ramster_remotify_init();
+ #endif
+ out:
+ 	return ret;
+-- 
+1.7.1
diff --git a/a/content_digest b/N1/content_digest
index 2f50d01..2eba9f6 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -13,5 +13,2671 @@
  " dan.magenheimer@oracle.com\0"
  "\00:1\0"
  "b\0"
+ ">From d23d411ef33f094c14855d48962d44aec854c500 Mon Sep 17 00:00:00 2001\n"
+ "From: Dan Magenheimer <dan.magenheimer@oracle.com>\n"
+ "Date: Wed, 21 Dec 2011 14:01:59 -0700\n"
+ "Subject: [PATCH v2 4/6] drivers/staging/ramster: ramster-specific changes to zcache/tmem\n"
+ "\n"
+ "In tmem.[ch], new \"repatriate\" (provoke async get) and \"localify\" (handle\n"
+ "incoming data resulting from an async get) routines combine with a handful\n"
+ "of changes to existing pamops interfaces allow the generic tmem code\n"
+ "to support asynchronous operations.  Also, a new tmem_xhandle struct\n"
+ "groups together key information that must be passed to remote tmem stores.\n"
+ "\n"
+ "Zcache-main.c is augmented with a large amount of ramster-specific code\n"
+ "to handle remote operations and \"foreign\" pages on both ends of the\n"
+ "\"remotify\" protocol.  New \"foreign\" pools are auto-created on demand.\n"
+ "A \"selfshrinker\" thread periodically repatriates remote persistent pages\n"
+ "when local memory conditions allow.  For certain operations, a queue is\n"
+ "necessary to guarantee strict ordering as out-of-order puts/flushes can\n"
+ "cause strange race conditions.  Pampd pointers now either point to local\n"
+ "memory OR describe a remote page; to allow the same 64-bits to describe\n"
+ "either, the LSB is used to differentiate.  Some acrobatics must be performed\n"
+ "to ensure local memory is available to handle a remote persistent get,\n"
+ "or deal with the data directly anyway if the malloc failed.  Lots\n"
+ "of ramster-specific statistics are available via sysfs.\n"
+ "\n"
+ "Note: Some debug ifdefs left in for now.\n"
+ "\n"
+ "Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>\n"
+ "\n"
+ "---\n"
+ "\n"
+ " drivers/staging/ramster/Kconfig       |   17 +-\n"
+ " drivers/staging/ramster/Makefile      |    5 +-\n"
+ " drivers/staging/ramster/tmem.c        |  117 ++-\n"
+ " drivers/staging/ramster/tmem.h        |   46 +-\n"
+ " drivers/staging/ramster/zcache-main.c | 1651 +++++++++++++++++++++++++++++----\n"
+ " 5 files changed, 1636 insertions(+), 200 deletions(-)\n"
+ "\n"
+ "diff --git a/drivers/staging/ramster/Kconfig b/drivers/staging/ramster/Kconfig\n"
+ "index 7fabcb2..5154693 100644\n"
+ "--- a/drivers/staging/ramster/Kconfig\n"
+ "+++ b/drivers/staging/ramster/Kconfig\n"
+ "@@ -1,13 +1,14 @@\n"
+ "-config ZCACHE\n"
+ "-\ttristate \"Dynamic compression of swap pages and clean pagecache pages\"\n"
+ "-\tdepends on CLEANCACHE || FRONTSWAP\n"
+ "+config RAMSTER\n"
+ "+\ttristate \"Cross-machine RAM capacity sharing, aka peer-to-peer tmem\"\n"
+ "+\tdepends on (CLEANCACHE || FRONTSWAP) && CONFIGFS_FS && !OCFS2_FS && !ZCACHE && !PREEMPT_VOLUNTARY && !HIGHMEM\n"
+ " \tselect XVMALLOC\n"
+ " \tselect LZO_COMPRESS\n"
+ " \tselect LZO_DECOMPRESS\n"
+ " \tdefault n\n"
+ " \thelp\n"
+ "-\t  Zcache doubles RAM efficiency while providing a significant\n"
+ "-\t  performance boosts on many workloads.  Zcache uses lzo1x\n"
+ "-\t  compression and an in-kernel implementation of transcendent\n"
+ "-\t  memory to store clean page cache pages and swap in RAM,\n"
+ "-\t  providing a noticeable reduction in disk I/O.\n"
+ "+\t  RAMster allows RAM on other machines in a cluster to be utilized\n"
+ "+\t  dynamically and symmetrically instead of swapping to a local swap\n"
+ "+\t  disk, thus improving performance on memory-constrained workloads\n"
+ "+\t  while minimizing total RAM across the cluster.  RAMster, like\n"
+ "+\t  zcache, compresses swap pages into local RAM, but then remotifies\n"
+ "+\t  the compressed pages to another node in the RAMster cluster.\n"
+ "diff --git a/drivers/staging/ramster/Makefile b/drivers/staging/ramster/Makefile\n"
+ "index 60daa27..e6c4a2e 100644\n"
+ "--- a/drivers/staging/ramster/Makefile\n"
+ "+++ b/drivers/staging/ramster/Makefile\n"
+ "@@ -1,3 +1,2 @@\n"
+ "-zcache-y\t:=\tzcache-main.o tmem.o\n"
+ "-\n"
+ "-obj-$(CONFIG_ZCACHE)\t+=\tzcache.o\n"
+ "+obj-$(CONFIG_RAMSTER)\t+=\tzcache-main.o tmem.o\n"
+ "+obj-$(CONFIG_RAMSTER)\t+=\tramster_o2net.o cluster/\n"
+ "diff --git a/drivers/staging/ramster/tmem.c b/drivers/staging/ramster/tmem.c\n"
+ "index 1ca66ea..ed7d07b 100644\n"
+ "--- a/drivers/staging/ramster/tmem.c\n"
+ "+++ b/drivers/staging/ramster/tmem.c\n"
+ "@@ -27,6 +27,7 @@\n"
+ " #include <linux/list.h>\n"
+ " #include <linux/spinlock.h>\n"
+ " #include <linux/atomic.h>\n"
+ "+#include <linux/delay.h>\n"
+ " \n"
+ " #include \"tmem.h\"\n"
+ " \n"
+ "@@ -316,7 +317,7 @@ static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)\n"
+ " }\n"
+ " \n"
+ " static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index,\n"
+ "-\t\t\t\t\tvoid *new_pampd)\n"
+ "+\t\t\t\t\tvoid *new_pampd, bool no_free)\n"
+ " {\n"
+ " \tstruct tmem_objnode **slot;\n"
+ " \tvoid *ret = NULL;\n"
+ "@@ -325,7 +326,9 @@ static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index,\n"
+ " \tif ((slot != NULL) && (*slot != NULL)) {\n"
+ " \t\tvoid *old_pampd = *(void **)slot;\n"
+ " \t\t*(void **)slot = new_pampd;\n"
+ "-\t\t(*tmem_pamops.free)(old_pampd, obj->pool, NULL, 0);\n"
+ "+\t\tif (!no_free)\n"
+ "+\t\t\t(*tmem_pamops.free)(old_pampd, obj->pool,\n"
+ "+\t\t\t\t\t\tNULL, 0, false);\n"
+ " \t\tret = new_pampd;\n"
+ " \t}\n"
+ " \treturn ret;\n"
+ "@@ -481,7 +484,7 @@ static void tmem_objnode_node_destroy(struct tmem_obj *obj,\n"
+ " \t\t\tif (ht == 1) {\n"
+ " \t\t\t\tobj->pampd_count--;\n"
+ " \t\t\t\t(*tmem_pamops.free)(objnode->slots[i],\n"
+ "-\t\t\t\t\t\tobj->pool, NULL, 0);\n"
+ "+\t\t\t\t\t\tobj->pool, NULL, 0, true);\n"
+ " \t\t\t\tobjnode->slots[i] = NULL;\n"
+ " \t\t\t\tcontinue;\n"
+ " \t\t\t}\n"
+ "@@ -498,7 +501,8 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)\n"
+ " \t\treturn;\n"
+ " \tif (obj->objnode_tree_height == 0) {\n"
+ " \t\tobj->pampd_count--;\n"
+ "-\t\t(*tmem_pamops.free)(obj->objnode_tree_root, obj->pool, NULL, 0);\n"
+ "+\t\t(*tmem_pamops.free)(obj->objnode_tree_root,\n"
+ "+\t\t\t\t\tobj->pool, NULL, 0, true);\n"
+ " \t} else {\n"
+ " \t\ttmem_objnode_node_destroy(obj, obj->objnode_tree_root,\n"
+ " \t\t\t\t\tobj->objnode_tree_height);\n"
+ "@@ -529,7 +533,7 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)\n"
+ "  * always flushes for simplicity.\n"
+ "  */\n"
+ " int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,\n"
+ "-\t\tchar *data, size_t size, bool raw, bool ephemeral)\n"
+ "+\t\tchar *data, size_t size, bool raw, int ephemeral)\n"
+ " {\n"
+ " \tstruct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL;\n"
+ " \tvoid *pampd = NULL, *pampd_del = NULL;\n"
+ "@@ -545,7 +549,7 @@ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,\n"
+ " \t\t\t/* if found, is a dup put, flush the old one */\n"
+ " \t\t\tpampd_del = tmem_pampd_delete_from_obj(obj, index);\n"
+ " \t\t\tBUG_ON(pampd_del != pampd);\n"
+ "-\t\t\t(*tmem_pamops.free)(pampd, pool, oidp, index);\n"
+ "+\t\t\t(*tmem_pamops.free)(pampd, pool, oidp, index, true);\n"
+ " \t\t\tif (obj->pampd_count == 0) {\n"
+ " \t\t\t\tobjnew = obj;\n"
+ " \t\t\t\tobjfound = NULL;\n"
+ "@@ -576,7 +580,7 @@ delete_and_free:\n"
+ " \t(void)tmem_pampd_delete_from_obj(obj, index);\n"
+ " free:\n"
+ " \tif (pampd)\n"
+ "-\t\t(*tmem_pamops.free)(pampd, pool, NULL, 0);\n"
+ "+\t\t(*tmem_pamops.free)(pampd, pool, NULL, 0, true);\n"
+ " \tif (objnew) {\n"
+ " \t\ttmem_obj_free(objnew, hb);\n"
+ " \t\t(*tmem_hostops.obj_free)(objnew, pool);\n"
+ "@@ -586,6 +590,65 @@ out:\n"
+ " \treturn ret;\n"
+ " }\n"
+ " \n"
+ "+void *tmem_localify_get_pampd(struct tmem_pool *pool, struct tmem_oid *oidp,\n"
+ "+\t\t\t\tuint32_t index, struct tmem_obj **ret_obj,\n"
+ "+\t\t\t\tvoid **saved_hb)\n"
+ "+{\n"
+ "+\tstruct tmem_hashbucket *hb;\n"
+ "+\tstruct tmem_obj *obj = NULL;\n"
+ "+\tvoid *pampd = NULL;\n"
+ "+\n"
+ "+\thb = &pool->hashbucket[tmem_oid_hash(oidp)];\n"
+ "+\tspin_lock(&hb->lock);\n"
+ "+\tobj = tmem_obj_find(hb, oidp);\n"
+ "+\tif (likely(obj != NULL))\n"
+ "+\t\tpampd = tmem_pampd_lookup_in_obj(obj, index);\n"
+ "+\t*ret_obj = obj;\n"
+ "+\t*saved_hb = (void *)hb;\n"
+ "+\t/* note, hashbucket remains locked */\n"
+ "+\treturn pampd;\n"
+ "+}\n"
+ "+\n"
+ "+void tmem_localify_finish(struct tmem_obj *obj, uint32_t index,\n"
+ "+\t\t\t  void *pampd, void *saved_hb, bool delete)\n"
+ "+{\n"
+ "+\tstruct tmem_hashbucket *hb = (struct tmem_hashbucket *)saved_hb;\n"
+ "+\n"
+ "+\tBUG_ON(!spin_is_locked(&hb->lock));\n"
+ "+\tif (pampd != NULL) {\n"
+ "+\t\tBUG_ON(obj == NULL);\n"
+ "+\t\t(void)tmem_pampd_replace_in_obj(obj, index, pampd, 1);\n"
+ "+\t} else if (delete) {\n"
+ "+\t\tBUG_ON(obj == NULL);\n"
+ "+\t\t(void)tmem_pampd_delete_from_obj(obj, index);\n"
+ "+\t}\n"
+ "+\tspin_unlock(&hb->lock);\n"
+ "+}\n"
+ "+\n"
+ "+static int tmem_repatriate(void **ppampd, struct tmem_hashbucket *hb,\n"
+ "+\t\t\t\tstruct tmem_pool *pool, struct tmem_oid *oidp,\n"
+ "+\t\t\t\tuint32_t index, bool free, char *data)\n"
+ "+{\n"
+ "+\tvoid *old_pampd = *ppampd, *new_pampd = NULL;\n"
+ "+\tbool intransit = false;\n"
+ "+\tint ret = 0;\n"
+ "+\n"
+ "+\n"
+ "+\tif (!is_ephemeral(pool))\n"
+ "+\t\tnew_pampd = (*tmem_pamops.repatriate_preload)(\n"
+ "+\t\t\t\told_pampd, pool, oidp, index, &intransit);\n"
+ "+\tif (intransit)\n"
+ "+\t\tret = -EAGAIN;\n"
+ "+\telse if (new_pampd != NULL)\n"
+ "+\t\t*ppampd = new_pampd;\n"
+ "+\t/* must release the hb->lock else repatriate can't sleep */\n"
+ "+\tspin_unlock(&hb->lock);\n"
+ "+\tif (!intransit)\n"
+ "+\t\tret = (*tmem_pamops.repatriate)(old_pampd, new_pampd, pool,\n"
+ "+\t\t\t\t\t\toidp, index, free, data);\n"
+ "+\treturn ret;\n"
+ "+}\n"
+ "+\n"
+ " /*\n"
+ "  * \"Get\" a page, e.g. if one can be found, copy the tmem page with the\n"
+ "  * matching handle from PAM space to the kernel.  By tmem definition,\n"
+ "@@ -607,14 +670,38 @@ int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,\n"
+ " \tint ret = -1;\n"
+ " \tstruct tmem_hashbucket *hb;\n"
+ " \tbool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral);\n"
+ "-\tbool lock_held = false;\n"
+ "+\tbool lock_held = 0;\n"
+ "+\tvoid **ppampd;\n"
+ " \n"
+ "+again:\n"
+ " \thb = &pool->hashbucket[tmem_oid_hash(oidp)];\n"
+ " \tspin_lock(&hb->lock);\n"
+ "-\tlock_held = true;\n"
+ "+\tlock_held = 1;\n"
+ " \tobj = tmem_obj_find(hb, oidp);\n"
+ " \tif (obj == NULL)\n"
+ " \t\tgoto out;\n"
+ "+\tppampd = __tmem_pampd_lookup_in_obj(obj, index);\n"
+ "+\tif (ppampd == NULL)\n"
+ "+\t\tgoto out;\n"
+ "+\tif (tmem_pamops.is_remote(*ppampd)) {\n"
+ "+\t\tret = tmem_repatriate(ppampd, hb, pool, oidp,\n"
+ "+\t\t\t\t\tindex, free, data);\n"
+ "+\t\tlock_held = 0; /* note hb->lock has been unlocked */\n"
+ "+\t\tif (ret == -EAGAIN) {\n"
+ "+\t\t\t/* rare I think, but should cond_resched()??? */\n"
+ "+\t\t\tusleep_range(10, 1000);\n"
+ "+\t\t\tgoto again;\n"
+ "+\t\t} else if (ret != 0) {\n"
+ "+#if 1\n"
+ "+\t\t\tif (ret != -ENOENT)\n"
+ "+\t\t\t\tpr_err(\"UNTESTED case in tmem_get, ret=%d\\n\",\n"
+ "+\t\t\t\t\t\tret);\n"
+ "+#endif\n"
+ "+\t\t\tret = -1;\n"
+ "+\t\t\tgoto out;\n"
+ "+\t\t}\n"
+ "+\t\tgoto out;\n"
+ "+\t}\n"
+ " \tif (free)\n"
+ " \t\tpampd = tmem_pampd_delete_from_obj(obj, index);\n"
+ " \telse\n"
+ "@@ -628,10 +715,6 @@ int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,\n"
+ " \t\t\tobj = NULL;\n"
+ " \t\t}\n"
+ " \t}\n"
+ "-\tif (tmem_pamops.is_remote(pampd)) {\n"
+ "-\t\tlock_held = false;\n"
+ "-\t\tspin_unlock(&hb->lock);\n"
+ "-\t}\n"
+ " \tif (free)\n"
+ " \t\tret = (*tmem_pamops.get_data_and_free)(\n"
+ " \t\t\t\tdata, size, raw, pampd, pool, oidp, index);\n"
+ "@@ -668,7 +751,7 @@ int tmem_flush_page(struct tmem_pool *pool,\n"
+ " \tpampd = tmem_pampd_delete_from_obj(obj, index);\n"
+ " \tif (pampd == NULL)\n"
+ " \t\tgoto out;\n"
+ "-\t(*tmem_pamops.free)(pampd, pool, oidp, index);\n"
+ "+\t(*tmem_pamops.free)(pampd, pool, oidp, index, true);\n"
+ " \tif (obj->pampd_count == 0) {\n"
+ " \t\ttmem_obj_free(obj, hb);\n"
+ " \t\t(*tmem_hostops.obj_free)(obj, pool);\n"
+ "@@ -682,8 +765,8 @@ out:\n"
+ " \n"
+ " /*\n"
+ "  * If a page in tmem matches the handle, replace the page so that any\n"
+ "- * subsequent \"get\" gets the new page.  Returns 0 if\n"
+ "- * there was a page to replace, else returns -1.\n"
+ "+ * subsequent \"get\" gets the new page.  Returns the new page if\n"
+ "+ * there was a page to replace, else returns NULL.\n"
+ "  */\n"
+ " int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp,\n"
+ " \t\t\tuint32_t index, void *new_pampd)\n"
+ "@@ -697,7 +780,7 @@ int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp,\n"
+ " \tobj = tmem_obj_find(hb, oidp);\n"
+ " \tif (obj == NULL)\n"
+ " \t\tgoto out;\n"
+ "-\tnew_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd);\n"
+ "+\tnew_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd, 0);\n"
+ " \tret = (*tmem_pamops.replace_in_obj)(new_pampd, obj);\n"
+ " out:\n"
+ " \tspin_unlock(&hb->lock);\n"
+ "diff --git a/drivers/staging/ramster/tmem.h b/drivers/staging/ramster/tmem.h\n"
+ "index ed147c4..47f1918 100644\n"
+ "--- a/drivers/staging/ramster/tmem.h\n"
+ "+++ b/drivers/staging/ramster/tmem.h\n"
+ "@@ -9,7 +9,6 @@\n"
+ " #ifndef _TMEM_H_\n"
+ " #define _TMEM_H_\n"
+ " \n"
+ "-#include <linux/types.h>\n"
+ " #include <linux/highmem.h>\n"
+ " #include <linux/hash.h>\n"
+ " #include <linux/atomic.h>\n"
+ "@@ -89,6 +88,31 @@ struct tmem_oid {\n"
+ " \tuint64_t oid[3];\n"
+ " };\n"
+ " \n"
+ "+struct tmem_xhandle {\n"
+ "+\tuint8_t client_id;\n"
+ "+\tuint8_t xh_data_cksum;\n"
+ "+\tuint16_t xh_data_size;\n"
+ "+\tuint16_t pool_id;\n"
+ "+\tstruct tmem_oid oid;\n"
+ "+\tuint32_t index;\n"
+ "+\tvoid *extra;\n"
+ "+};\n"
+ "+\n"
+ "+static inline struct tmem_xhandle tmem_xhandle_fill(uint16_t client_id,\n"
+ "+\t\t\t\t\tstruct tmem_pool *pool,\n"
+ "+\t\t\t\t\tstruct tmem_oid *oidp,\n"
+ "+\t\t\t\t\tuint32_t index)\n"
+ "+{\n"
+ "+\tstruct tmem_xhandle xh;\n"
+ "+\txh.client_id = client_id;\n"
+ "+\txh.xh_data_cksum = (uint8_t)-1;\n"
+ "+\txh.xh_data_size = (uint16_t)-1;\n"
+ "+\txh.pool_id = pool->pool_id;\n"
+ "+\txh.oid = *oidp;\n"
+ "+\txh.index = index;\n"
+ "+\treturn xh;\n"
+ "+}\n"
+ "+\n"
+ " static inline void tmem_oid_set_invalid(struct tmem_oid *oidp)\n"
+ " {\n"
+ " \toidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL;\n"
+ "@@ -147,7 +171,11 @@ struct tmem_obj {\n"
+ " \tunsigned int objnode_tree_height;\n"
+ " \tunsigned long objnode_count;\n"
+ " \tlong pampd_count;\n"
+ "-\tvoid *extra; /* for private use by pampd implementation */\n"
+ "+\t/* for current design of ramster, all pages belonging to\n"
+ "+\t * an object reside on the same remotenode and extra is\n"
+ "+\t * used to record the number of the remotenode so a\n"
+ "+\t * flush-object operation can specify it */\n"
+ "+\tvoid *extra; /* for use by pampd implementation */\n"
+ " \tDECL_SENTINEL\n"
+ " };\n"
+ " \n"
+ "@@ -174,9 +202,14 @@ struct tmem_pamops {\n"
+ " \tint (*get_data_and_free)(char *, size_t *, bool, void *,\n"
+ " \t\t\t\tstruct tmem_pool *, struct tmem_oid *,\n"
+ " \t\t\t\tuint32_t);\n"
+ "-\tvoid (*free)(void *, struct tmem_pool *, struct tmem_oid *, uint32_t);\n"
+ "+\tvoid (*free)(void *, struct tmem_pool *,\n"
+ "+\t\t\t\tstruct tmem_oid *, uint32_t, bool);\n"
+ " \tvoid (*free_obj)(struct tmem_pool *, struct tmem_obj *);\n"
+ " \tbool (*is_remote)(void *);\n"
+ "+\tvoid *(*repatriate_preload)(void *, struct tmem_pool *,\n"
+ "+\t\t\t\t\tstruct tmem_oid *, uint32_t, bool *);\n"
+ "+\tint (*repatriate)(void *, void *, struct tmem_pool *,\n"
+ "+\t\t\t\tstruct tmem_oid *, uint32_t, bool, void *);\n"
+ " \tvoid (*new_obj)(struct tmem_obj *);\n"
+ " \tint (*replace_in_obj)(void *, struct tmem_obj *);\n"
+ " };\n"
+ "@@ -193,11 +226,16 @@ extern void tmem_register_hostops(struct tmem_hostops *m);\n"
+ " \n"
+ " /* core tmem accessor functions */\n"
+ " extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index,\n"
+ "-\t\t\tchar *, size_t, bool, bool);\n"
+ "+\t\t\tchar *, size_t, bool, int);\n"
+ " extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index,\n"
+ " \t\t\tchar *, size_t *, bool, int);\n"
+ " extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index,\n"
+ " \t\t\tvoid *);\n"
+ "+extern void *tmem_localify_get_pampd(struct tmem_pool *, struct tmem_oid *,\n"
+ "+\t\t\t\t   uint32_t index, struct tmem_obj **,\n"
+ "+\t\t\t\t   void **);\n"
+ "+extern void tmem_localify_finish(struct tmem_obj *, uint32_t index,\n"
+ "+\t\t\t\t void *, void *, bool);\n"
+ " extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *,\n"
+ " \t\t\tuint32_t index);\n"
+ " extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *);\n"
+ "diff --git a/drivers/staging/ramster/zcache-main.c b/drivers/staging/ramster/zcache-main.c\n"
+ "index cd0ed84..92fc7b2 100644\n"
+ "--- a/drivers/staging/ramster/zcache-main.c\n"
+ "+++ b/drivers/staging/ramster/zcache-main.c\n"
+ "@@ -1,7 +1,7 @@\n"
+ " /*\n"
+ "  * zcache.c\n"
+ "  *\n"
+ "- * Copyright (c) 2010,2011, Dan Magenheimer, Oracle Corp.\n"
+ "+ * Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp.\n"
+ "  * Copyright (c) 2010,2011, Nitin Gupta\n"
+ "  *\n"
+ "  * Zcache provides an in-kernel \"host implementation\" for transcendent memory\n"
+ "@@ -17,9 +17,11 @@\n"
+ "  *\n"
+ "  * [1] For a definition of page-accessible memory (aka PAM), see:\n"
+ "  *   http://marc.info/?l=linux-mm&m=127811271605009\n"
+ "+ *  RAMSTER TODO:\n"
+ "+ *   - handle remotifying of buddied pages (see zbud_remotify_zbpg)\n"
+ "+ *   - kernel boot params: nocleancache/nofrontswap don't always work?!?\n"
+ "  */\n"
+ " \n"
+ "-#include <linux/module.h>\n"
+ " #include <linux/cpu.h>\n"
+ " #include <linux/highmem.h>\n"
+ " #include <linux/list.h>\n"
+ "@@ -30,11 +32,13 @@\n"
+ " #include <linux/atomic.h>\n"
+ " #include <linux/math64.h>\n"
+ " #include \"tmem.h\"\n"
+ "+#include \"zcache.h\"\n"
+ "+#include \"ramster.h\"\n"
+ " \n"
+ " #include \"../zram/xvmalloc.h\" /* if built in drivers/staging */\n"
+ " \n"
+ " #if (!defined(CONFIG_CLEANCACHE) && !defined(CONFIG_FRONTSWAP))\n"
+ "-#error \"zcache is useless without CONFIG_CLEANCACHE or CONFIG_FRONTSWAP\"\n"
+ "+#error \"ramster is useless without CONFIG_CLEANCACHE or CONFIG_FRONTSWAP\"\n"
+ " #endif\n"
+ " #ifdef CONFIG_CLEANCACHE\n"
+ " #include <linux/cleancache.h>\n"
+ "@@ -43,6 +47,61 @@\n"
+ " #include <linux/frontswap.h>\n"
+ " #endif\n"
+ " \n"
+ "+enum ramster_remotify_op {\n"
+ "+\tRAMSTER_REMOTIFY_EPH_PUT,\n"
+ "+\tRAMSTER_REMOTIFY_PERS_PUT,\n"
+ "+\tRAMSTER_REMOTIFY_FLUSH_PAGE,\n"
+ "+\tRAMSTER_REMOTIFY_FLUSH_OBJ,\n"
+ "+\tRAMSTER_INTRANSIT_PERS\n"
+ "+};\n"
+ "+\n"
+ "+struct ramster_remotify_hdr {\n"
+ "+\tenum ramster_remotify_op op;\n"
+ "+\tstruct list_head list;\n"
+ "+};\n"
+ "+\n"
+ "+#define ZBH_SENTINEL  0x43214321\n"
+ "+#define ZBPG_SENTINEL  0xdeadbeef\n"
+ "+\n"
+ "+#define ZBUD_MAX_BUDS 2\n"
+ "+\n"
+ "+struct zbud_hdr {\n"
+ "+\tstruct ramster_remotify_hdr rem_op;\n"
+ "+\tuint16_t client_id;\n"
+ "+\tuint16_t pool_id;\n"
+ "+\tstruct tmem_oid oid;\n"
+ "+\tuint32_t index;\n"
+ "+\tuint16_t size; /* compressed size in bytes, zero means unused */\n"
+ "+\tDECL_SENTINEL\n"
+ "+};\n"
+ "+\n"
+ "+#define ZVH_SENTINEL  0x43214321\n"
+ "+static const int zv_max_page_size = (PAGE_SIZE / 8) * 7;\n"
+ "+\n"
+ "+struct zv_hdr {\n"
+ "+\tstruct ramster_remotify_hdr rem_op;\n"
+ "+\tuint16_t client_id;\n"
+ "+\tuint16_t pool_id;\n"
+ "+\tstruct tmem_oid oid;\n"
+ "+\tuint32_t index;\n"
+ "+\tDECL_SENTINEL\n"
+ "+};\n"
+ "+\n"
+ "+struct flushlist_node {\n"
+ "+\tstruct ramster_remotify_hdr rem_op;\n"
+ "+\tstruct tmem_xhandle xh;\n"
+ "+};\n"
+ "+\n"
+ "+union {\n"
+ "+\tstruct ramster_remotify_hdr rem_op;\n"
+ "+\tstruct zv_hdr zv;\n"
+ "+\tstruct zbud_hdr zbud;\n"
+ "+\tstruct flushlist_node flist;\n"
+ "+} remotify_list_node;\n"
+ "+\n"
+ "+static LIST_HEAD(zcache_rem_op_list);\n"
+ "+static DEFINE_SPINLOCK(zcache_rem_op_list_lock);\n"
+ "+\n"
+ " #if 0\n"
+ " /* this is more aggressive but may cause other problems? */\n"
+ " #define ZCACHE_GFP_MASK\t(GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN)\n"
+ "@@ -98,20 +157,6 @@ static inline bool is_local_client(struct zcache_client *cli)\n"
+ "  * read or written unless the zbpg's lock is held.\n"
+ "  */\n"
+ " \n"
+ "-#define ZBH_SENTINEL  0x43214321\n"
+ "-#define ZBPG_SENTINEL  0xdeadbeef\n"
+ "-\n"
+ "-#define ZBUD_MAX_BUDS 2\n"
+ "-\n"
+ "-struct zbud_hdr {\n"
+ "-\tuint16_t client_id;\n"
+ "-\tuint16_t pool_id;\n"
+ "-\tstruct tmem_oid oid;\n"
+ "-\tuint32_t index;\n"
+ "-\tuint16_t size; /* compressed size in bytes, zero means unused */\n"
+ "-\tDECL_SENTINEL\n"
+ "-};\n"
+ "-\n"
+ " struct zbud_page {\n"
+ " \tstruct list_head bud_list;\n"
+ " \tspinlock_t lock;\n"
+ "@@ -153,8 +198,37 @@ static unsigned long zcache_zbud_curr_zbytes;\n"
+ " static unsigned long zcache_zbud_cumul_zpages;\n"
+ " static unsigned long zcache_zbud_cumul_zbytes;\n"
+ " static unsigned long zcache_compress_poor;\n"
+ "+static unsigned long zcache_policy_percent_exceeded;\n"
+ " static unsigned long zcache_mean_compress_poor;\n"
+ " \n"
+ "+/*\n"
+ "+ * RAMster counters\n"
+ "+ * - Remote pages are pages with a local pampd but the data is remote\n"
+ "+ * - Foreign pages are pages stored locally but belonging to another node\n"
+ "+ */\n"
+ "+static atomic_t ramster_remote_pers_pages = ATOMIC_INIT(0);\n"
+ "+static unsigned long ramster_pers_remotify_enable;\n"
+ "+static unsigned long ramster_eph_remotify_enable;\n"
+ "+static unsigned long ramster_eph_pages_remoted;\n"
+ "+static unsigned long ramster_eph_pages_remote_failed;\n"
+ "+static unsigned long ramster_pers_pages_remoted;\n"
+ "+static unsigned long ramster_pers_pages_remote_failed;\n"
+ "+static unsigned long ramster_pers_pages_remote_nomem;\n"
+ "+static unsigned long ramster_remote_objects_flushed;\n"
+ "+static unsigned long ramster_remote_object_flushes_failed;\n"
+ "+static unsigned long ramster_remote_pages_flushed;\n"
+ "+static unsigned long ramster_remote_page_flushes_failed;\n"
+ "+static unsigned long ramster_remote_eph_pages_succ_get;\n"
+ "+static unsigned long ramster_remote_pers_pages_succ_get;\n"
+ "+static unsigned long ramster_remote_eph_pages_unsucc_get;\n"
+ "+static unsigned long ramster_remote_pers_pages_unsucc_get;\n"
+ "+static atomic_t ramster_curr_flnode_count = ATOMIC_INIT(0);\n"
+ "+static unsigned long ramster_curr_flnode_count_max;\n"
+ "+static atomic_t ramster_foreign_eph_pampd_count = ATOMIC_INIT(0);\n"
+ "+static unsigned long ramster_foreign_eph_pampd_count_max;\n"
+ "+static atomic_t ramster_foreign_pers_pampd_count = ATOMIC_INIT(0);\n"
+ "+static unsigned long ramster_foreign_pers_pampd_count_max;\n"
+ "+\n"
+ " /* forward references */\n"
+ " static void *zcache_get_free_page(void);\n"
+ " static void zcache_free_page(void *p);\n"
+ "@@ -210,6 +284,29 @@ static char *zbud_data(struct zbud_hdr *zh, unsigned size)\n"
+ " \treturn p;\n"
+ " }\n"
+ " \n"
+ "+static void zbud_copy_from_pampd(char *data, size_t *size, struct zbud_hdr *zh)\n"
+ "+{\n"
+ "+\tstruct zbud_page *zbpg;\n"
+ "+\tchar *p;\n"
+ "+\tunsigned budnum;\n"
+ "+\n"
+ "+\tASSERT_SENTINEL(zh, ZBH);\n"
+ "+\tbudnum = zbud_budnum(zh);\n"
+ "+\tzbpg = container_of(zh, struct zbud_page, buddy[budnum]);\n"
+ "+\tspin_lock(&zbpg->lock);\n"
+ "+\tBUG_ON(zh->size > *size);\n"
+ "+\tp = (char *)zbpg;\n"
+ "+\tif (budnum == 0)\n"
+ "+\t\tp += ((sizeof(struct zbud_page) + CHUNK_SIZE - 1) &\n"
+ "+\t\t\t\t\t\t\tCHUNK_MASK);\n"
+ "+\telse if (budnum == 1)\n"
+ "+\t\tp += PAGE_SIZE - ((zh->size + CHUNK_SIZE - 1) & CHUNK_MASK);\n"
+ "+\t/* client should be filled in by caller */\n"
+ "+\tmemcpy(data, p, zh->size);\n"
+ "+\t*size = zh->size;\n"
+ "+\tspin_unlock(&zbpg->lock);\n"
+ "+}\n"
+ "+\n"
+ " /*\n"
+ "  * zbud raw page management\n"
+ "  */\n"
+ "@@ -299,6 +396,7 @@ static void zbud_free_and_delist(struct zbud_hdr *zh)\n"
+ " \tstruct zbud_page *zbpg =\n"
+ " \t\tcontainer_of(zh, struct zbud_page, buddy[budnum]);\n"
+ " \n"
+ "+\tBUG_ON(!irqs_disabled());\n"
+ " \tspin_lock(&zbpg->lock);\n"
+ " \tif (list_empty(&zbpg->bud_list)) {\n"
+ " \t\t/* ignore zombie page... see zbud_evict_pages() */\n"
+ "@@ -358,8 +456,13 @@ static struct zbud_hdr *zbud_create(uint16_t client_id, uint16_t pool_id,\n"
+ " \tif (unlikely(zbpg == NULL))\n"
+ " \t\tgoto out;\n"
+ " \t/* ok, have a page, now compress the data before taking locks */\n"
+ "+#if 1 /* 110721 FIX LOCK ORDERING TO ELIMINATE DEADLOCK */\n"
+ "+\tspin_lock(&zbud_budlists_spinlock);\n"
+ "+\tspin_lock(&zbpg->lock);\n"
+ "+#else\n"
+ " \tspin_lock(&zbpg->lock);\n"
+ " \tspin_lock(&zbud_budlists_spinlock);\n"
+ "+#endif\n"
+ " \tlist_add_tail(&zbpg->bud_list, &zbud_unbuddied[nchunks].list);\n"
+ " \tzbud_unbuddied[nchunks].count++;\n"
+ " \tzh = &zbpg->buddy[0];\n"
+ "@@ -389,12 +492,19 @@ init_zh:\n"
+ " \tzh->oid = *oid;\n"
+ " \tzh->pool_id = pool_id;\n"
+ " \tzh->client_id = client_id;\n"
+ "+#if 1 /* 110721 FIX LOCK ORDERING TO ELIMINATE DEADLOCK */\n"
+ "+\tto = zbud_data(zh, size);\n"
+ "+\tmemcpy(to, cdata, size);\n"
+ "+\tspin_unlock(&zbpg->lock);\n"
+ "+\tspin_unlock(&zbud_budlists_spinlock);\n"
+ "+#else\n"
+ " \t/* can wait to copy the data until the list locks are dropped */\n"
+ " \tspin_unlock(&zbud_budlists_spinlock);\n"
+ " \n"
+ " \tto = zbud_data(zh, size);\n"
+ " \tmemcpy(to, cdata, size);\n"
+ " \tspin_unlock(&zbpg->lock);\n"
+ "+#endif\n"
+ " \tzbud_cumul_chunk_counts[nchunks]++;\n"
+ " \tatomic_inc(&zcache_zbud_curr_zpages);\n"
+ " \tzcache_zbud_cumul_zpages++;\n"
+ "@@ -458,6 +568,7 @@ static void zbud_evict_zbpg(struct zbud_page *zbpg)\n"
+ " \tuint32_t index[ZBUD_MAX_BUDS];\n"
+ " \tstruct tmem_oid oid[ZBUD_MAX_BUDS];\n"
+ " \tstruct tmem_pool *pool;\n"
+ "+\tunsigned long flags;\n"
+ " \n"
+ " \tASSERT_SPINLOCK(&zbpg->lock);\n"
+ " \tBUG_ON(!list_empty(&zbpg->bud_list));\n"
+ "@@ -474,9 +585,12 @@ static void zbud_evict_zbpg(struct zbud_page *zbpg)\n"
+ " \t}\n"
+ " \tspin_unlock(&zbpg->lock);\n"
+ " \tfor (i = 0; i < j; i++) {\n"
+ "+\t\t/* FIXME FIXME this just evicts local ephemeral pages!!! */\n"
+ " \t\tpool = zcache_get_pool_by_id(client_id[i], pool_id[i]);\n"
+ " \t\tif (pool != NULL) {\n"
+ "+\t\t\tlocal_irq_save(flags);\n"
+ " \t\t\ttmem_flush_page(pool, &oid[i], index[i]);\n"
+ "+\t\t\tlocal_irq_restore(flags);\n"
+ " \t\t\tzcache_put_pool(pool);\n"
+ " \t\t}\n"
+ " \t}\n"
+ "@@ -496,7 +610,7 @@ static void zbud_evict_zbpg(struct zbud_page *zbpg)\n"
+ " static void zbud_evict_pages(int nr)\n"
+ " {\n"
+ " \tstruct zbud_page *zbpg;\n"
+ "-\tint i;\n"
+ "+\tint i, newly_unused_pages = 0;\n"
+ " \n"
+ " \t/* first try freeing any pages on unused list */\n"
+ " retry_unused_list:\n"
+ "@@ -512,7 +626,7 @@ retry_unused_list:\n"
+ " \t\tzcache_free_page(zbpg);\n"
+ " \t\tzcache_evicted_raw_pages++;\n"
+ " \t\tif (--nr <= 0)\n"
+ "-\t\t\tgoto out;\n"
+ "+\t\t\tgoto done;\n"
+ " \t\tgoto retry_unused_list;\n"
+ " \t}\n"
+ " \tspin_unlock_bh(&zbpg_unused_list_spinlock);\n"
+ "@@ -534,9 +648,10 @@ retry_unbud_list_i:\n"
+ " \t\t\tzcache_evicted_unbuddied_pages++;\n"
+ " \t\t\t/* want budlists unlocked when doing zbpg eviction */\n"
+ " \t\t\tzbud_evict_zbpg(zbpg);\n"
+ "+\t\t\tnewly_unused_pages++;\n"
+ " \t\t\tlocal_bh_enable();\n"
+ " \t\t\tif (--nr <= 0)\n"
+ "-\t\t\t\tgoto out;\n"
+ "+\t\t\t\tgoto evict_unused;\n"
+ " \t\t\tgoto retry_unbud_list_i;\n"
+ " \t\t}\n"
+ " \t\tspin_unlock_bh(&zbud_budlists_spinlock);\n"
+ "@@ -547,7 +662,7 @@ retry_bud_list:\n"
+ " \tspin_lock_bh(&zbud_budlists_spinlock);\n"
+ " \tif (list_empty(&zbud_buddied_list)) {\n"
+ " \t\tspin_unlock_bh(&zbud_budlists_spinlock);\n"
+ "-\t\tgoto out;\n"
+ "+\t\tgoto evict_unused;\n"
+ " \t}\n"
+ " \tlist_for_each_entry(zbpg, &zbud_buddied_list, bud_list) {\n"
+ " \t\tif (unlikely(!spin_trylock(&zbpg->lock)))\n"
+ "@@ -558,16 +673,362 @@ retry_bud_list:\n"
+ " \t\tzcache_evicted_buddied_pages++;\n"
+ " \t\t/* want budlists unlocked when doing zbpg eviction */\n"
+ " \t\tzbud_evict_zbpg(zbpg);\n"
+ "+\t\tnewly_unused_pages++;\n"
+ " \t\tlocal_bh_enable();\n"
+ " \t\tif (--nr <= 0)\n"
+ "-\t\t\tgoto out;\n"
+ "+\t\t\tgoto evict_unused;\n"
+ " \t\tgoto retry_bud_list;\n"
+ " \t}\n"
+ " \tspin_unlock_bh(&zbud_budlists_spinlock);\n"
+ "+\n"
+ "+evict_unused:\n"
+ "+\t/*\n"
+ "+\t * zbud_evict_zbpg just moves pages on the unused list, it doesn't\n"
+ "+\t * free them so we need to actually free them here.\n"
+ "+\t */\n"
+ "+\tspin_lock_bh(&zbpg_unused_list_spinlock);\n"
+ "+\tif (!list_empty(&zbpg_unused_list) && newly_unused_pages--) {\n"
+ "+\t\t/* can't walk list here, since it may change when unlocked */\n"
+ "+\t\tzbpg = list_first_entry(&zbpg_unused_list,\n"
+ "+\t\t\t\tstruct zbud_page, bud_list);\n"
+ "+\t\tlist_del_init(&zbpg->bud_list);\n"
+ "+\t\tzcache_zbpg_unused_list_count--;\n"
+ "+\t\tatomic_dec(&zcache_zbud_curr_raw_pages);\n"
+ "+\t\tspin_unlock_bh(&zbpg_unused_list_spinlock);\n"
+ "+\t\tzcache_free_page(zbpg);\n"
+ "+\t\tgoto evict_unused;\n"
+ "+\t}\n"
+ "+\tspin_unlock_bh(&zbpg_unused_list_spinlock);\n"
+ "+done:\n"
+ "+\treturn;\n"
+ "+}\n"
+ "+\n"
+ "+static DEFINE_PER_CPU(unsigned char *, zcache_remoteputmem);\n"
+ "+\n"
+ "+/* only doing unbuddied for now, so only one remote put per zbpg */\n"
+ "+static int zbud_remotify_zbpg(struct zbud_page *zbpg)\n"
+ "+{\n"
+ "+\tstruct zbud_hdr *zh;\n"
+ "+\tstruct tmem_xhandle xh;\n"
+ "+\tstruct tmem_pool *pool;\n"
+ "+\tbool ephemeral;\n"
+ "+\tchar *data;\n"
+ "+\tsize_t size;\n"
+ "+\tint remotenode, ret = -1;\n"
+ "+\tunsigned long flags;\n"
+ "+\tunsigned char cksum;\n"
+ "+\tchar *p;\n"
+ "+\tint i;\n"
+ "+\tunsigned char *tmpmem = __get_cpu_var(zcache_remoteputmem);\n"
+ "+\n"
+ "+\tASSERT_SPINLOCK(&zbpg->lock);\n"
+ "+\tBUG_ON(!list_empty(&zbpg->bud_list));\n"
+ "+\n"
+ "+\tif (zbpg->buddy[0].size == 0)\n"
+ "+\t\tzh = &zbpg->buddy[1];\n"
+ "+\telse if (zbpg->buddy[1].size == 0)\n"
+ "+\t\tzh = &zbpg->buddy[0];\n"
+ "+\telse\n"
+ "+\t\tBUG(); /* apparently NOT unbuddied ?!? */\n"
+ "+\n"
+ "+\t/* don't remotify pages that are already remotified */\n"
+ "+\tif (zh->client_id != LOCAL_CLIENT) {\n"
+ "+\t\tspin_unlock(&zbpg->lock);\n"
+ "+\t\tret = 0;\n"
+ "+\t\tpreempt_enable();\n"
+ "+\t\tgoto out;\n"
+ "+\t}\n"
+ "+\txh.client_id = zh->client_id;\n"
+ "+\txh.pool_id = zh->pool_id;\n"
+ "+\txh.oid = zh->oid;\n"
+ "+\txh.index = zh->index;\n"
+ "+\tsize = zh->size;\n"
+ "+\tdata = zbud_data(zh, size);\n"
+ "+\tfor (p = data, cksum = 0, i = 0; i < size; i++)\n"
+ "+\t\tcksum += *p;\n"
+ "+\tmemcpy(tmpmem, data, size);\n"
+ "+\tdata = tmpmem;\n"
+ "+\tspin_unlock(&zbpg->lock);\n"
+ "+\tpreempt_enable();\t/* no locks held anymore */\n"
+ "+\tpool = zcache_get_pool_by_id(zh->client_id, zh->pool_id);\n"
+ "+\tBUG_ON(pool == NULL);\n"
+ "+\tephemeral = !pool->persistent;\n"
+ "+\tzcache_put_pool(pool);\n"
+ "+\tret = ramster_remote_put(&xh, data, size, ephemeral, &remotenode);\n"
+ "+\tif (ret == 0) {\n"
+ "+\t\t/* data was successfully remoted so change the local version\n"
+ "+\t\t * to point to the remote node where it landed */\n"
+ "+\t\tstruct tmem_pool *pool;\n"
+ "+\t\tpool = zcache_get_pool_by_id(LOCAL_CLIENT, xh.pool_id);\n"
+ "+\t\tBUG_ON(pool == NULL);\n"
+ "+\t\tlocal_irq_save(flags);\n"
+ "+\t\t(void)tmem_replace(pool, &xh.oid, xh.index,\n"
+ "+\t\t\tpampd_make_remote(remotenode, size, cksum));\n"
+ "+\t\tlocal_irq_restore(flags);\n"
+ "+\t\tzcache_put_pool(pool);\n"
+ "+\t\tramster_eph_pages_remoted++;\n"
+ "+\t\tret = 1;\n"
+ "+\t} else\n"
+ "+\t\tramster_eph_pages_remote_failed++;\n"
+ "+\n"
+ "+out:\n"
+ "+\treturn ret;\n"
+ "+}\n"
+ "+\n"
+ "+void zbud_remotify_pages(int nr)\n"
+ "+{\n"
+ "+\tstruct zbud_page *zbpg;\n"
+ "+\tint i, ret;\n"
+ "+\n"
+ "+\t/*\n"
+ "+\t * for now just try remotifying unbuddied pages, starting with\n"
+ "+\t * least space avail\n"
+ "+\t */\n"
+ "+\tfor (i = 0; i < MAX_CHUNK; i++) {\n"
+ "+retry_unbud_list_i:\n"
+ "+\t\tpreempt_disable();  /* enable in zbud_remotify_zbpg */\n"
+ "+\t\tspin_lock_bh(&zbud_budlists_spinlock);\n"
+ "+\t\tif (list_empty(&zbud_unbuddied[i].list)) {\n"
+ "+\t\t\tspin_unlock_bh(&zbud_budlists_spinlock);\n"
+ "+\t\t\tcontinue;\n"
+ "+\t\t}\n"
+ "+\t\tlist_for_each_entry(zbpg, &zbud_unbuddied[i].list, bud_list) {\n"
+ "+\t\t\tif (unlikely(!spin_trylock(&zbpg->lock))) {\n"
+ "+\t\t\t\tspin_unlock_bh(&zbud_budlists_spinlock);\n"
+ "+\t\t\t\tpreempt_enable();\n"
+ "+\t\t\t\tcontinue;\n"
+ "+\t\t\t}\n"
+ "+\t\t\tlist_del_init(&zbpg->bud_list);\n"
+ "+\t\t\tzbud_unbuddied[i].count--;\n"
+ "+\t\t\tspin_unlock(&zbud_budlists_spinlock);\n"
+ "+\t\t\t/* want budlists unlocked when doing zbpg remotify */\n"
+ "+\t\t\tlocal_bh_enable();\n"
+ "+\t\t\tret = zbud_remotify_zbpg(zbpg);\n"
+ "+\t\t\t/* preemption is now re-enabled */\n"
+ "+\t\t\tif (ret == 0)\n"
+ "+\t\t\t\tBUG();\n"
+ "+\t\t\telse if (ret == 1)\n"
+ "+\t\t\t\t--nr;\n"
+ "+\t\t\telse {\n"
+ "+\t\t\t\t/* if fail to remotify any page, quit */\n"
+ "+pr_err(\"TESTING zbud_remotify_pages failed on page, trying to re-add\\n\");\n"
+ "+\t\t\t\tspin_lock_bh(&zbud_budlists_spinlock);\n"
+ "+\t\t\t\tspin_lock(&zbpg->lock);\n"
+ "+\t\t\t\tlist_add_tail(&zbpg->bud_list,\n"
+ "+\t\t\t\t\t&zbud_unbuddied[i].list);\n"
+ "+\t\t\t\tzbud_unbuddied[i].count++;\n"
+ "+\t\t\t\tspin_unlock(&zbpg->lock);\n"
+ "+\t\t\t\tspin_unlock_bh(&zbud_budlists_spinlock);\n"
+ "+pr_err(\"TESTING zbud_remotify_pages failed on page, finished re-add\\n\");\n"
+ "+\t\t\t\tgoto out;\n"
+ "+\t\t\t}\n"
+ "+\t\t\tif (nr <= 0)\n"
+ "+\t\t\t\tgoto out;\n"
+ "+\t\t\tgoto retry_unbud_list_i;\n"
+ "+\t\t}\n"
+ "+\t\tspin_unlock_bh(&zbud_budlists_spinlock);\n"
+ "+\t}\n"
+ " out:\n"
+ " \treturn;\n"
+ " }\n"
+ " \n"
+ "+/* the \"flush list\" asynchronously collects pages to remotely flush */\n"
+ "+#define FLUSH_ENTIRE_OBJECT ((uint32_t)-1)\n"
+ "+static void ramster_flnode_free(struct flushlist_node *,\n"
+ "+\t\t\t\tstruct tmem_pool *);\n"
+ "+\n"
+ "+static void zcache_remote_flush_page(struct flushlist_node *flnode)\n"
+ "+{\n"
+ "+\tstruct tmem_xhandle *xh;\n"
+ "+\tint remotenode, ret;\n"
+ "+\n"
+ "+\tpreempt_disable();\n"
+ "+\txh = &flnode->xh;\n"
+ "+\tremotenode = flnode->xh.client_id;\n"
+ "+\tret = ramster_remote_flush(xh, remotenode);\n"
+ "+\tif (ret >= 0)\n"
+ "+\t\tramster_remote_pages_flushed++;\n"
+ "+\telse\n"
+ "+\t\tramster_remote_page_flushes_failed++;\n"
+ "+\tpreempt_enable_no_resched();\n"
+ "+\tramster_flnode_free(flnode, NULL);\n"
+ "+}\n"
+ "+\n"
+ "+static void zcache_remote_flush_object(struct flushlist_node *flnode)\n"
+ "+{\n"
+ "+\tstruct tmem_xhandle *xh;\n"
+ "+\tint remotenode, ret;\n"
+ "+\n"
+ "+\tpreempt_disable();\n"
+ "+\txh = &flnode->xh;\n"
+ "+\tremotenode = flnode->xh.client_id;\n"
+ "+\tret = ramster_remote_flush_object(xh, remotenode);\n"
+ "+\tif (ret >= 0)\n"
+ "+\t\tramster_remote_objects_flushed++;\n"
+ "+\telse\n"
+ "+\t\tramster_remote_object_flushes_failed++;\n"
+ "+\tpreempt_enable_no_resched();\n"
+ "+\tramster_flnode_free(flnode, NULL);\n"
+ "+}\n"
+ "+\n"
+ "+static void zcache_remote_eph_put(struct zbud_hdr *zbud)\n"
+ "+{\n"
+ "+\t/* FIXME */\n"
+ "+}\n"
+ "+\n"
+ "+static void zcache_remote_pers_put(struct zv_hdr *zv)\n"
+ "+{\n"
+ "+\tstruct tmem_xhandle xh;\n"
+ "+\tuint16_t size;\n"
+ "+\tbool ephemeral;\n"
+ "+\tint remotenode, ret = -1;\n"
+ "+\tchar *data;\n"
+ "+\tstruct tmem_pool *pool;\n"
+ "+\tunsigned long flags;\n"
+ "+\tunsigned char cksum;\n"
+ "+\tchar *p;\n"
+ "+\tint i;\n"
+ "+\tunsigned char *tmpmem = __get_cpu_var(zcache_remoteputmem);\n"
+ "+\n"
+ "+\tASSERT_SENTINEL(zv, ZVH);\n"
+ "+\tBUG_ON(zv->client_id != LOCAL_CLIENT);\n"
+ "+\tlocal_bh_disable();\n"
+ "+\txh.client_id = zv->client_id;\n"
+ "+\txh.pool_id = zv->pool_id;\n"
+ "+\txh.oid = zv->oid;\n"
+ "+\txh.index = zv->index;\n"
+ "+\tsize = xv_get_object_size(zv) - sizeof(*zv);\n"
+ "+\tBUG_ON(size == 0 || size > zv_max_page_size);\n"
+ "+\tdata = (char *)zv + sizeof(*zv);\n"
+ "+\tfor (p = data, cksum = 0, i = 0; i < size; i++)\n"
+ "+\t\tcksum += *p;\n"
+ "+\tmemcpy(tmpmem, data, size);\n"
+ "+\tdata = tmpmem;\n"
+ "+\tpool = zcache_get_pool_by_id(zv->client_id, zv->pool_id);\n"
+ "+\tephemeral = is_ephemeral(pool);\n"
+ "+\tzcache_put_pool(pool);\n"
+ "+\t/* now OK to release lock set in caller */\n"
+ "+\tspin_unlock(&zcache_rem_op_list_lock);\n"
+ "+\tlocal_bh_enable();\n"
+ "+\tpreempt_disable();\n"
+ "+\tret = ramster_remote_put(&xh, data, size, ephemeral, &remotenode);\n"
+ "+\tpreempt_enable_no_resched();\n"
+ "+\tif (ret != 0) {\n"
+ "+\t\t/*\n"
+ "+\t\t * This is some form of a memory leak... if the remote put\n"
+ "+\t\t * fails, there will never be another attempt to remotify\n"
+ "+\t\t * this page.  But since we've dropped the zv pointer,\n"
+ "+\t\t * the page may have been freed or the data replaced\n"
+ "+\t\t * so we can't just \"put it back\" in the remote op list.\n"
+ "+\t\t * Even if we could, not sure where to put it in the list\n"
+ "+\t\t * because there may be flushes that must be strictly\n"
+ "+\t\t * ordered vs the put.  So leave this as a FIXME for now.\n"
+ "+\t\t * But count them so we know if it becomes a problem.\n"
+ "+\t\t */\n"
+ "+\t\tramster_pers_pages_remote_failed++;\n"
+ "+\t\tgoto out;\n"
+ "+\t} else\n"
+ "+\t\tatomic_inc(&ramster_remote_pers_pages);\n"
+ "+\tramster_pers_pages_remoted++;\n"
+ "+\t/*\n"
+ "+\t * data was successfully remoted so change the local version to\n"
+ "+\t * point to the remote node where it landed\n"
+ "+\t */\n"
+ "+\tlocal_bh_disable();\n"
+ "+\tpool = zcache_get_pool_by_id(LOCAL_CLIENT, xh.pool_id);\n"
+ "+\tlocal_irq_save(flags);\n"
+ "+\t(void)tmem_replace(pool, &xh.oid, xh.index,\n"
+ "+\t\t\tpampd_make_remote(remotenode, size, cksum));\n"
+ "+\tlocal_irq_restore(flags);\n"
+ "+\tzcache_put_pool(pool);\n"
+ "+\tlocal_bh_enable();\n"
+ "+out:\n"
+ "+\treturn;\n"
+ "+}\n"
+ "+\n"
+ "+static void zcache_do_remotify_ops(int nr)\n"
+ "+{\n"
+ "+\tstruct ramster_remotify_hdr *rem_op;\n"
+ "+\tunion remotify_list_node *u;\n"
+ "+\n"
+ "+\twhile (1) {\n"
+ "+\t\tif (!nr)\n"
+ "+\t\t\tgoto out;\n"
+ "+\t\tspin_lock(&zcache_rem_op_list_lock);\n"
+ "+\t\tif (list_empty(&zcache_rem_op_list)) {\n"
+ "+\t\t\tspin_unlock(&zcache_rem_op_list_lock);\n"
+ "+\t\t\tgoto out;\n"
+ "+\t\t}\n"
+ "+\t\trem_op = list_first_entry(&zcache_rem_op_list,\n"
+ "+\t\t\t\tstruct ramster_remotify_hdr, list);\n"
+ "+\t\tlist_del_init(&rem_op->list);\n"
+ "+\t\tif (rem_op->op != RAMSTER_REMOTIFY_PERS_PUT)\n"
+ "+\t\t\tspin_unlock(&zcache_rem_op_list_lock);\n"
+ "+\t\tu = (union remotify_list_node *)rem_op;\n"
+ "+\t\tswitch (rem_op->op) {\n"
+ "+\t\tcase RAMSTER_REMOTIFY_EPH_PUT:\n"
+ "+BUG();\n"
+ "+\t\t\tzcache_remote_eph_put((struct zbud_hdr *)rem_op);\n"
+ "+\t\t\tbreak;\n"
+ "+\t\tcase RAMSTER_REMOTIFY_PERS_PUT:\n"
+ "+\t\t\tzcache_remote_pers_put((struct zv_hdr *)rem_op);\n"
+ "+\t\t\tbreak;\n"
+ "+\t\tcase RAMSTER_REMOTIFY_FLUSH_PAGE:\n"
+ "+\t\t\tzcache_remote_flush_page((struct flushlist_node *)u);\n"
+ "+\t\t\tbreak;\n"
+ "+\t\tcase RAMSTER_REMOTIFY_FLUSH_OBJ:\n"
+ "+\t\t\tzcache_remote_flush_object((struct flushlist_node *)u);\n"
+ "+\t\t\tbreak;\n"
+ "+\t\tdefault:\n"
+ "+\t\t\tBUG();\n"
+ "+\t\t}\n"
+ "+\t}\n"
+ "+out:\n"
+ "+\treturn;\n"
+ "+}\n"
+ "+\n"
+ "+/*\n"
+ "+ * For now, just push over a few pages every few seconds to\n"
+ "+ * ensure that it basically works\n"
+ "+ */\n"
+ "+static struct workqueue_struct *ramster_remotify_workqueue;\n"
+ "+static void ramster_remotify_process(struct work_struct *work);\n"
+ "+static DECLARE_DELAYED_WORK(ramster_remotify_worker,\n"
+ "+\t\tramster_remotify_process);\n"
+ "+\n"
+ "+static void ramster_remotify_queue_delayed_work(unsigned long delay)\n"
+ "+{\n"
+ "+\tif (!queue_delayed_work(ramster_remotify_workqueue,\n"
+ "+\t\t\t\t&ramster_remotify_worker, delay))\n"
+ "+\t\tpr_err(\"ramster_remotify: bad workqueue\\n\");\n"
+ "+}\n"
+ "+\n"
+ "+\n"
+ "+static int use_frontswap;\n"
+ "+static int use_cleancache;\n"
+ "+static void ramster_remotify_process(struct work_struct *work)\n"
+ "+{\n"
+ "+\tBUG_ON(irqs_disabled());\n"
+ "+\tramster_remotify_queue_delayed_work(10 * HZ);\n"
+ "+#ifdef CONFIG_FRONTSWAP\n"
+ "+\tif (use_frontswap && ramster_pers_remotify_enable)\n"
+ "+\t\tzcache_do_remotify_ops(500); /* FIXME is this a good number? */\n"
+ "+#endif\n"
+ "+#ifdef CONFIG_CLEANCACHE\n"
+ "+\tif (use_cleancache && ramster_eph_remotify_enable)\n"
+ "+\t\tzbud_remotify_pages(100);\n"
+ "+#endif\n"
+ "+}\n"
+ "+\n"
+ "+static void ramster_remotify_init(void)\n"
+ "+{\n"
+ "+\tunsigned long n = 60UL;\n"
+ "+\tramster_remotify_workqueue =\n"
+ "+\t\tcreate_singlethread_workqueue(\"ramster_remotify\");\n"
+ "+\tramster_remotify_queue_delayed_work(n * HZ);\n"
+ "+}\n"
+ "+\n"
+ "+\n"
+ " static void zbud_init(void)\n"
+ " {\n"
+ " \tint i;\n"
+ "@@ -631,15 +1092,6 @@ static int zbud_show_cumul_chunk_counts(char *buf)\n"
+ "  * necessary for decompression) immediately preceding the compressed data.\n"
+ "  */\n"
+ " \n"
+ "-#define ZVH_SENTINEL  0x43214321\n"
+ "-\n"
+ "-struct zv_hdr {\n"
+ "-\tuint32_t pool_id;\n"
+ "-\tstruct tmem_oid oid;\n"
+ "-\tuint32_t index;\n"
+ "-\tDECL_SENTINEL\n"
+ "-};\n"
+ "-\n"
+ " /* rudimentary policy limits */\n"
+ " /* total number of persistent pages may not exceed this percentage */\n"
+ " static unsigned int zv_page_count_policy_percent = 75;\n"
+ "@@ -658,7 +1110,7 @@ static unsigned int zv_max_mean_zsize = (PAGE_SIZE / 8) * 5;\n"
+ " static unsigned long zv_curr_dist_counts[NCHUNKS];\n"
+ " static unsigned long zv_cumul_dist_counts[NCHUNKS];\n"
+ " \n"
+ "-static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,\n"
+ "+static struct zv_hdr *zv_create(struct zcache_client *cli, uint32_t pool_id,\n"
+ " \t\t\t\tstruct tmem_oid *oid, uint32_t index,\n"
+ " \t\t\t\tvoid *cdata, unsigned clen)\n"
+ " {\n"
+ "@@ -671,7 +1123,7 @@ static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,\n"
+ " \n"
+ " \tBUG_ON(!irqs_disabled());\n"
+ " \tBUG_ON(chunks >= NCHUNKS);\n"
+ "-\tret = xv_malloc(xvpool, alloc_size,\n"
+ "+\tret = xv_malloc(cli->xvpool, clen + sizeof(struct zv_hdr),\n"
+ " \t\t\t&page, &offset, ZCACHE_GFP_MASK);\n"
+ " \tif (unlikely(ret))\n"
+ " \t\tgoto out;\n"
+ "@@ -682,12 +1134,50 @@ static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,\n"
+ " \tzv->oid = *oid;\n"
+ " \tzv->pool_id = pool_id;\n"
+ " \tSET_SENTINEL(zv, ZVH);\n"
+ "+\tINIT_LIST_HEAD(&zv->rem_op.list);\n"
+ "+\tzv->client_id = get_client_id_from_client(cli);\n"
+ "+\tzv->rem_op.op = RAMSTER_REMOTIFY_PERS_PUT;\n"
+ "+\tif (zv->client_id == LOCAL_CLIENT) {\n"
+ "+\t\tspin_lock(&zcache_rem_op_list_lock);\n"
+ "+\t\tlist_add_tail(&zv->rem_op.list, &zcache_rem_op_list);\n"
+ "+\t\tspin_unlock(&zcache_rem_op_list_lock);\n"
+ "+\t}\n"
+ " \tmemcpy((char *)zv + sizeof(struct zv_hdr), cdata, clen);\n"
+ " \tkunmap_atomic(zv, KM_USER0);\n"
+ " out:\n"
+ " \treturn zv;\n"
+ " }\n"
+ " \n"
+ "+/* similar to zv_create, but just reserve space, no data yet */\n"
+ "+static struct zv_hdr *zv_alloc(struct tmem_pool *pool,\n"
+ "+\t\t\t\tstruct tmem_oid *oid, uint32_t index,\n"
+ "+\t\t\t\tunsigned clen)\n"
+ "+{\n"
+ "+\tstruct zcache_client *cli = pool->client;\n"
+ "+\tstruct page *page;\n"
+ "+\tstruct zv_hdr *zv = NULL;\n"
+ "+\tuint32_t offset;\n"
+ "+\tint ret;\n"
+ "+\n"
+ "+\tBUG_ON(!irqs_disabled());\n"
+ "+\tBUG_ON(!is_local_client(pool->client));\n"
+ "+\tret = xv_malloc(cli->xvpool, clen + sizeof(struct zv_hdr),\n"
+ "+\t\t\t&page, &offset, ZCACHE_GFP_MASK);\n"
+ "+\tif (unlikely(ret))\n"
+ "+\t\tgoto out;\n"
+ "+\tzv = kmap_atomic(page, KM_USER0) + offset;\n"
+ "+\tSET_SENTINEL(zv, ZVH);\n"
+ "+\tINIT_LIST_HEAD(&zv->rem_op.list);\n"
+ "+\tzv->client_id = LOCAL_CLIENT;\n"
+ "+\tzv->rem_op.op = RAMSTER_INTRANSIT_PERS;\n"
+ "+\tzv->index = index;\n"
+ "+\tzv->oid = *oid;\n"
+ "+\tzv->pool_id = pool->pool_id;\n"
+ "+\tkunmap_atomic(zv, KM_USER0);\n"
+ "+out:\n"
+ "+\treturn zv;\n"
+ "+}\n"
+ "+\n"
+ " static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv)\n"
+ " {\n"
+ " \tunsigned long flags;\n"
+ "@@ -700,8 +1190,13 @@ static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv)\n"
+ " \tBUG_ON(chunks >= NCHUNKS);\n"
+ " \tzv_curr_dist_counts[chunks]--;\n"
+ " \tsize -= sizeof(*zv);\n"
+ "+\tspin_lock(&zcache_rem_op_list_lock);\n"
+ "+\tsize = xv_get_object_size(zv) - sizeof(*zv);\n"
+ " \tBUG_ON(size == 0);\n"
+ " \tINVERT_SENTINEL(zv, ZVH);\n"
+ "+\tif (!list_empty(&zv->rem_op.list))\n"
+ "+\t\tlist_del_init(&zv->rem_op.list);\n"
+ "+\tspin_unlock(&zcache_rem_op_list_lock);\n"
+ " \tpage = virt_to_page(zv);\n"
+ " \toffset = (unsigned long)zv & ~PAGE_MASK;\n"
+ " \tlocal_irq_save(flags);\n"
+ "@@ -727,6 +1222,29 @@ static void zv_decompress(struct page *page, struct zv_hdr *zv)\n"
+ " \tBUG_ON(clen != PAGE_SIZE);\n"
+ " }\n"
+ " \n"
+ "+static void zv_copy_from_pampd(char *data, size_t *bufsize, struct zv_hdr *zv)\n"
+ "+{\n"
+ "+\tunsigned size;\n"
+ "+\n"
+ "+\tASSERT_SENTINEL(zv, ZVH);\n"
+ "+\tsize = xv_get_object_size(zv) - sizeof(*zv);\n"
+ "+\tBUG_ON(size == 0 || size > zv_max_page_size);\n"
+ "+\tBUG_ON(size > *bufsize);\n"
+ "+\tmemcpy(data, (char *)zv + sizeof(*zv), size);\n"
+ "+\t*bufsize = size;\n"
+ "+}\n"
+ "+\n"
+ "+static void zv_copy_to_pampd(struct zv_hdr *zv, char *data, size_t size)\n"
+ "+{\n"
+ "+\tunsigned zv_size;\n"
+ "+\n"
+ "+\tASSERT_SENTINEL(zv, ZVH);\n"
+ "+\tzv_size = xv_get_object_size(zv) - sizeof(*zv);\n"
+ "+\tBUG_ON(zv_size != size);\n"
+ "+\tBUG_ON(zv_size == 0 || zv_size > zv_max_page_size);\n"
+ "+\tmemcpy((char *)zv + sizeof(*zv), data, size);\n"
+ "+}\n"
+ "+\n"
+ " #ifdef CONFIG_SYSFS\n"
+ " /*\n"
+ "  * show a distribution of compression stats for zv pages.\n"
+ "@@ -979,6 +1497,7 @@ static DEFINE_SPINLOCK(zcache_direct_reclaim_lock);\n"
+ "  */\n"
+ " static struct kmem_cache *zcache_objnode_cache;\n"
+ " static struct kmem_cache *zcache_obj_cache;\n"
+ "+static struct kmem_cache *ramster_flnode_cache;\n"
+ " static atomic_t zcache_curr_obj_count = ATOMIC_INIT(0);\n"
+ " static unsigned long zcache_curr_obj_count_max;\n"
+ " static atomic_t zcache_curr_objnode_count = ATOMIC_INIT(0);\n"
+ "@@ -994,6 +1513,7 @@ struct zcache_preload {\n"
+ " \tstruct tmem_obj *obj;\n"
+ " \tint nr;\n"
+ " \tstruct tmem_objnode *objnodes[OBJNODE_TREE_MAX_PATH];\n"
+ "+\tstruct flushlist_node *flnode;\n"
+ " };\n"
+ " static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, };\n"
+ " \n"
+ "@@ -1002,6 +1522,7 @@ static int zcache_do_preload(struct tmem_pool *pool)\n"
+ " \tstruct zcache_preload *kp;\n"
+ " \tstruct tmem_objnode *objnode;\n"
+ " \tstruct tmem_obj *obj;\n"
+ "+\tstruct flushlist_node *flnode;\n"
+ " \tvoid *page;\n"
+ " \tint ret = -ENOMEM;\n"
+ " \n"
+ "@@ -1009,10 +1530,6 @@ static int zcache_do_preload(struct tmem_pool *pool)\n"
+ " \t\tgoto out;\n"
+ " \tif (unlikely(zcache_obj_cache == NULL))\n"
+ " \t\tgoto out;\n"
+ "-\tif (!spin_trylock(&zcache_direct_reclaim_lock)) {\n"
+ "-\t\tzcache_aborted_preload++;\n"
+ "-\t\tgoto out;\n"
+ "-\t}\n"
+ " \tpreempt_disable();\n"
+ " \tkp = &__get_cpu_var(zcache_preloads);\n"
+ " \twhile (kp->nr < ARRAY_SIZE(kp->objnodes)) {\n"
+ "@@ -1036,6 +1553,11 @@ static int zcache_do_preload(struct tmem_pool *pool)\n"
+ " \t\tzcache_failed_alloc++;\n"
+ " \t\tgoto unlock_out;\n"
+ " \t}\n"
+ "+\tflnode = kmem_cache_alloc(ramster_flnode_cache, ZCACHE_GFP_MASK);\n"
+ "+\tif (unlikely(flnode == NULL)) {\n"
+ "+\t\tzcache_failed_alloc++;\n"
+ "+\t\tgoto unlock_out;\n"
+ "+\t}\n"
+ " \tpage = (void *)__get_free_page(ZCACHE_GFP_MASK);\n"
+ " \tif (unlikely(page == NULL)) {\n"
+ " \t\tzcache_failed_get_free_pages++;\n"
+ "@@ -1048,17 +1570,40 @@ static int zcache_do_preload(struct tmem_pool *pool)\n"
+ " \t\tkp->obj = obj;\n"
+ " \telse\n"
+ " \t\tkmem_cache_free(zcache_obj_cache, obj);\n"
+ "+\tif (kp->flnode == NULL)\n"
+ "+\t\tkp->flnode = flnode;\n"
+ "+\telse\n"
+ "+\t\tkmem_cache_free(ramster_flnode_cache, flnode);\n"
+ " \tif (kp->page == NULL)\n"
+ " \t\tkp->page = page;\n"
+ " \telse\n"
+ " \t\tfree_page((unsigned long)page);\n"
+ " \tret = 0;\n"
+ " unlock_out:\n"
+ "-\tspin_unlock(&zcache_direct_reclaim_lock);\n"
+ " out:\n"
+ " \treturn ret;\n"
+ " }\n"
+ " \n"
+ "+static int ramster_do_preload_flnode_only(struct tmem_pool *pool)\n"
+ "+{\n"
+ "+\tstruct zcache_preload *kp;\n"
+ "+\tstruct flushlist_node *flnode;\n"
+ "+\tint ret = -ENOMEM;\n"
+ "+\n"
+ "+\tBUG_ON(!irqs_disabled());\n"
+ "+\tif (unlikely(ramster_flnode_cache == NULL))\n"
+ "+\t\tBUG();\n"
+ "+\tkp = &__get_cpu_var(zcache_preloads);\n"
+ "+\tflnode = kmem_cache_alloc(ramster_flnode_cache, GFP_ATOMIC);\n"
+ "+\tif (unlikely(flnode == NULL) && kp->flnode == NULL)\n"
+ "+\t\tBUG();  /* FIXME handle more gracefully, but how??? */\n"
+ "+\telse if (kp->flnode == NULL)\n"
+ "+\t\tkp->flnode = flnode;\n"
+ "+\telse\n"
+ "+\t\tkmem_cache_free(ramster_flnode_cache, flnode);\n"
+ "+\treturn ret;\n"
+ "+}\n"
+ "+\n"
+ " static void *zcache_get_free_page(void)\n"
+ " {\n"
+ " \tstruct zcache_preload *kp;\n"
+ "@@ -1131,6 +1676,30 @@ static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool)\n"
+ " \tkmem_cache_free(zcache_obj_cache, obj);\n"
+ " }\n"
+ " \n"
+ "+static struct flushlist_node *ramster_flnode_alloc(struct tmem_pool *pool)\n"
+ "+{\n"
+ "+\tstruct flushlist_node *flnode = NULL;\n"
+ "+\tstruct zcache_preload *kp;\n"
+ "+\tint count;\n"
+ "+\n"
+ "+\tkp = &__get_cpu_var(zcache_preloads);\n"
+ "+\tflnode = kp->flnode;\n"
+ "+\tBUG_ON(flnode == NULL);\n"
+ "+\tkp->flnode = NULL;\n"
+ "+\tcount = atomic_inc_return(&ramster_curr_flnode_count);\n"
+ "+\tif (count > ramster_curr_flnode_count_max)\n"
+ "+\t\tramster_curr_flnode_count_max = count;\n"
+ "+\treturn flnode;\n"
+ "+}\n"
+ "+\n"
+ "+static void ramster_flnode_free(struct flushlist_node *flnode,\n"
+ "+\t\t\t\tstruct tmem_pool *pool)\n"
+ "+{\n"
+ "+\tatomic_dec(&ramster_curr_flnode_count);\n"
+ "+\tBUG_ON(atomic_read(&ramster_curr_flnode_count) < 0);\n"
+ "+\tkmem_cache_free(ramster_flnode_cache, flnode);\n"
+ "+}\n"
+ "+\n"
+ " static struct tmem_hostops zcache_hostops = {\n"
+ " \t.obj_alloc = zcache_obj_alloc,\n"
+ " \t.obj_free = zcache_obj_free,\n"
+ "@@ -1150,22 +1719,20 @@ static unsigned long zcache_curr_pers_pampd_count_max;\n"
+ " /* forward reference */\n"
+ " static int zcache_compress(struct page *from, void **out_va, size_t *out_len);\n"
+ " \n"
+ "-static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph,\n"
+ "+static int zcache_pampd_eph_create(char *data, size_t size, bool raw,\n"
+ " \t\t\t\tstruct tmem_pool *pool, struct tmem_oid *oid,\n"
+ "-\t\t\t\t uint32_t index)\n"
+ "+\t\t\t\tuint32_t index, void **pampd)\n"
+ " {\n"
+ "-\tvoid *pampd = NULL, *cdata;\n"
+ "-\tsize_t clen;\n"
+ "-\tint ret;\n"
+ "-\tunsigned long count;\n"
+ "-\tstruct page *page = (struct page *)(data);\n"
+ "+\tint ret = -1;\n"
+ "+\tvoid *cdata = data;\n"
+ "+\tsize_t clen = size;\n"
+ " \tstruct zcache_client *cli = pool->client;\n"
+ " \tuint16_t client_id = get_client_id_from_client(cli);\n"
+ "-\tunsigned long zv_mean_zsize;\n"
+ "-\tunsigned long curr_pers_pampd_count;\n"
+ "-\tu64 total_zsize;\n"
+ "+\tstruct page *page = NULL;\n"
+ "+\tunsigned long count;\n"
+ " \n"
+ "-\tif (eph) {\n"
+ "+\tif (!raw) {\n"
+ "+\t\tpage = virt_to_page(data);\n"
+ " \t\tret = zcache_compress(page, &cdata, &clen);\n"
+ " \t\tif (ret == 0)\n"
+ " \t\t\tgoto out;\n"
+ "@@ -1173,46 +1740,137 @@ static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph,\n"
+ " \t\t\tzcache_compress_poor++;\n"
+ " \t\t\tgoto out;\n"
+ " \t\t}\n"
+ "-\t\tpampd = (void *)zbud_create(client_id, pool->pool_id, oid,\n"
+ "-\t\t\t\t\t\tindex, page, cdata, clen);\n"
+ "-\t\tif (pampd != NULL) {\n"
+ "-\t\t\tcount = atomic_inc_return(&zcache_curr_eph_pampd_count);\n"
+ "-\t\t\tif (count > zcache_curr_eph_pampd_count_max)\n"
+ "-\t\t\t\tzcache_curr_eph_pampd_count_max = count;\n"
+ "-\t\t}\n"
+ "-\t} else {\n"
+ "-\t\tcurr_pers_pampd_count =\n"
+ "-\t\t\tatomic_read(&zcache_curr_pers_pampd_count);\n"
+ "-\t\tif (curr_pers_pampd_count >\n"
+ "-\t\t    (zv_page_count_policy_percent * totalram_pages) / 100)\n"
+ "-\t\t\tgoto out;\n"
+ "-\t\tret = zcache_compress(page, &cdata, &clen);\n"
+ "-\t\tif (ret == 0)\n"
+ "-\t\t\tgoto out;\n"
+ "-\t\t/* reject if compression is too poor */\n"
+ "-\t\tif (clen > zv_max_zsize) {\n"
+ "-\t\t\tzcache_compress_poor++;\n"
+ "+\t}\n"
+ "+\t*pampd = (void *)zbud_create(client_id, pool->pool_id, oid,\n"
+ "+\t\t\t\t\tindex, page, cdata, clen);\n"
+ "+\tif (*pampd == NULL) {\n"
+ "+\t\tret = -ENOMEM;\n"
+ "+\t\tgoto out;\n"
+ "+\t}\n"
+ "+\tret = 0;\n"
+ "+\tcount = atomic_inc_return(&zcache_curr_eph_pampd_count);\n"
+ "+\tif (count > zcache_curr_eph_pampd_count_max)\n"
+ "+\t\tzcache_curr_eph_pampd_count_max = count;\n"
+ "+\tif (client_id != LOCAL_CLIENT) {\n"
+ "+\t\tcount = atomic_inc_return(&ramster_foreign_eph_pampd_count);\n"
+ "+\t\tif (count > ramster_foreign_eph_pampd_count_max)\n"
+ "+\t\t\tramster_foreign_eph_pampd_count_max = count;\n"
+ "+\t}\n"
+ "+out:\n"
+ "+\treturn ret;\n"
+ "+}\n"
+ "+\n"
+ "+static int zcache_pampd_pers_create(char *data, size_t size, bool raw,\n"
+ "+\t\t\t\tstruct tmem_pool *pool, struct tmem_oid *oid,\n"
+ "+\t\t\t\tuint32_t index, void **pampd)\n"
+ "+{\n"
+ "+\tint ret = -1;\n"
+ "+\tvoid *cdata = data;\n"
+ "+\tsize_t clen = size;\n"
+ "+\tstruct zcache_client *cli = pool->client;\n"
+ "+\tstruct page *page;\n"
+ "+\tunsigned long count;\n"
+ "+\tunsigned long zv_mean_zsize;\n"
+ "+\tstruct zv_hdr *zv;\n"
+ "+\tlong curr_pers_pampd_count;\n"
+ "+\tu64 total_zsize;\n"
+ "+\n"
+ "+\tcurr_pers_pampd_count = atomic_read(&zcache_curr_pers_pampd_count) -\n"
+ "+\t\t\tatomic_read(&ramster_remote_pers_pages);\n"
+ "+\t/* should always be positive, but warn if accounting is off */\n"
+ "+\tWARN_ON_ONCE(curr_pers_pampd_count < 0);\n"
+ "+\tif (curr_pers_pampd_count >\n"
+ "+\t\t    (zv_page_count_policy_percent * totalram_pages) / 100) {\n"
+ "+\t\tzcache_policy_percent_exceeded++;\n"
+ "+#if 0\n"
+ "+{\n"
+ "+static unsigned long cnt;\n"
+ "+cnt++;\n"
+ "+if (!(cnt&(cnt-1)))\n"
+ "+pr_err(\"TESTING zppc policy cnt=%lu, curr=%lu, limit=%lu, totalram=%lu\\n\",\n"
+ "+cnt, curr_pers_pampd_count,\n"
+ "+((zv_page_count_policy_percent * totalram_pages) / 100), totalram_pages);\n"
+ "+}\n"
+ "+#endif\n"
+ "+\t\tgoto out;\n"
+ "+\t}\n"
+ "+\tif (raw)\n"
+ "+\t\tgoto ok_to_create;\n"
+ "+\tpage = virt_to_page(data);\n"
+ "+\tif (zcache_compress(page, &cdata, &clen) == 0)\n"
+ "+\t\tgoto out;\n"
+ "+\t/* reject if compression is too poor */\n"
+ "+\tif (clen > zv_max_zsize) {\n"
+ "+\t\tzcache_compress_poor++;\n"
+ "+\t\tgoto out;\n"
+ "+\t}\n"
+ "+\t/* reject if mean compression is too poor */\n"
+ "+\tif ((clen > zv_max_mean_zsize) && (curr_pers_pampd_count > 0)) {\n"
+ "+\t\ttotal_zsize = xv_get_total_size_bytes(cli->xvpool);\n"
+ "+\t\tzv_mean_zsize = div_u64(total_zsize, curr_pers_pampd_count);\n"
+ "+\t\tif (zv_mean_zsize > zv_max_mean_zsize) {\n"
+ "+\t\t\tzcache_mean_compress_poor++;\n"
+ " \t\t\tgoto out;\n"
+ " \t\t}\n"
+ "-\t\t/* reject if mean compression is too poor */\n"
+ "-\t\tif ((clen > zv_max_mean_zsize) && (curr_pers_pampd_count > 0)) {\n"
+ "-\t\t\ttotal_zsize = xv_get_total_size_bytes(cli->xvpool);\n"
+ "-\t\t\tzv_mean_zsize = div_u64(total_zsize,\n"
+ "-\t\t\t\t\t\tcurr_pers_pampd_count);\n"
+ "-\t\t\tif (zv_mean_zsize > zv_max_mean_zsize) {\n"
+ "-\t\t\t\tzcache_mean_compress_poor++;\n"
+ "-\t\t\t\tgoto out;\n"
+ "-\t\t\t}\n"
+ "-\t\t}\n"
+ "-\t\tpampd = (void *)zv_create(cli->xvpool, pool->pool_id,\n"
+ "-\t\t\t\t\t\toid, index, cdata, clen);\n"
+ "-\t\tif (pampd == NULL)\n"
+ "-\t\t\tgoto out;\n"
+ "-\t\tcount = atomic_inc_return(&zcache_curr_pers_pampd_count);\n"
+ "-\t\tif (count > zcache_curr_pers_pampd_count_max)\n"
+ "-\t\t\tzcache_curr_pers_pampd_count_max = count;\n"
+ " \t}\n"
+ "+ok_to_create:\n"
+ "+\t*pampd = (void *)zv_create(cli, pool->pool_id, oid, index, cdata, clen);\n"
+ "+\tif (*pampd == NULL) {\n"
+ "+\t\tret = -ENOMEM;\n"
+ "+\t\tgoto out;\n"
+ "+\t}\n"
+ "+\tret = 0;\n"
+ "+\tcount = atomic_inc_return(&zcache_curr_pers_pampd_count);\n"
+ "+\tif (count > zcache_curr_pers_pampd_count_max)\n"
+ "+\t\tzcache_curr_pers_pampd_count_max = count;\n"
+ "+\tif (is_local_client(cli))\n"
+ "+\t\tgoto out;\n"
+ "+\tzv = *(struct zv_hdr **)pampd;\n"
+ "+\tcount = atomic_inc_return(&ramster_foreign_pers_pampd_count);\n"
+ "+\tif (count > ramster_foreign_pers_pampd_count_max)\n"
+ "+\t\tramster_foreign_pers_pampd_count_max = count;\n"
+ " out:\n"
+ "+#if 0\n"
+ "+if (ret == -ENOMEM) {\n"
+ "+static unsigned long cnt, lclcnt, fgncnt;\n"
+ "+cnt++;\n"
+ "+if (is_local_client(pool->client))\n"
+ "+ lclcnt++;\n"
+ "+else\n"
+ "+ fgncnt++;\n"
+ "+if (!(cnt&(cnt-1)))\n"
+ "+pr_err(\"TESTING zcache_pampd_create_PERS ENOMEM cnt=%lu, local=%lu, foreign=%lu, tot pampd_count=%lu, remote=%lu\\n\", cnt, lclcnt, fgncnt, (long)atomic_read(&zcache_curr_pers_pampd_count), (long)atomic_read(&ramster_remote_pers_pages));\n"
+ "+} else if (ret < 0) {\n"
+ "+static unsigned long cnt, lclcnt, fgncnt;\n"
+ "+cnt++;\n"
+ "+if (is_local_client(pool->client))\n"
+ "+ lclcnt++;\n"
+ "+else\n"
+ "+ fgncnt++;\n"
+ "+if (!(cnt&(cnt-1)))\n"
+ "+pr_err(\"TESTING zcache_pampd_create_PERS POLICYFAIL cnt=%lu, local=%lu, foreign=%lu, tot pampd_count=%lu, remote=%lu\\n\", cnt, lclcnt, fgncnt, (long)atomic_read(&zcache_curr_pers_pampd_count), (long)atomic_read(&ramster_remote_pers_pages));\n"
+ "+}\n"
+ "+#endif\n"
+ "+\treturn ret;\n"
+ "+}\n"
+ "+\n"
+ "+static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph,\n"
+ "+\t\t\t\tstruct tmem_pool *pool, struct tmem_oid *oid,\n"
+ "+\t\t\t\tuint32_t index)\n"
+ "+{\n"
+ "+\tvoid *pampd = NULL;\n"
+ "+\tint ret;\n"
+ "+\tbool ephemeral;\n"
+ "+\n"
+ "+\tBUG_ON(preemptible());\n"
+ "+\tephemeral = (eph == 1) || ((eph == 0) && is_ephemeral(pool));\n"
+ "+\tif (ephemeral)\n"
+ "+\t\tret = zcache_pampd_eph_create(data, size, raw, pool,\n"
+ "+\t\t\t\t\t\toid, index, &pampd);\n"
+ "+\telse\n"
+ "+\t\tret = zcache_pampd_pers_create(data, size, raw, pool,\n"
+ "+\t\t\t\t\t\toid, index, &pampd);\n"
+ "+\t/* FIXME add some counters here for failed creates? */\n"
+ " \treturn pampd;\n"
+ " }\n"
+ " \n"
+ "@@ -1226,75 +1884,368 @@ static int zcache_pampd_get_data(char *data, size_t *bufsize, bool raw,\n"
+ " {\n"
+ " \tint ret = 0;\n"
+ " \n"
+ "-\tBUG_ON(is_ephemeral(pool));\n"
+ "-\tzv_decompress((struct page *)(data), pampd);\n"
+ "+\tBUG_ON(preemptible());\n"
+ "+\tBUG_ON(is_ephemeral(pool)); /* Fix later for shared pools? */\n"
+ "+\tBUG_ON(pampd_is_remote(pampd));\n"
+ "+\tif (raw)\n"
+ "+\t\tzv_copy_from_pampd(data, bufsize, pampd);\n"
+ "+\telse\n"
+ "+\t\tzv_decompress(virt_to_page(data), pampd);\n"
+ " \treturn ret;\n"
+ " }\n"
+ " \n"
+ "-/*\n"
+ "- * fill the pageframe corresponding to the struct page with the data\n"
+ "- * from the passed pampd\n"
+ "- */\n"
+ " static int zcache_pampd_get_data_and_free(char *data, size_t *bufsize, bool raw,\n"
+ " \t\t\t\t\tvoid *pampd, struct tmem_pool *pool,\n"
+ " \t\t\t\t\tstruct tmem_oid *oid, uint32_t index)\n"
+ " {\n"
+ " \tint ret = 0;\n"
+ "+\tunsigned long flags;\n"
+ "+\tstruct zcache_client *cli = pool->client;\n"
+ " \n"
+ "-\tBUG_ON(!is_ephemeral(pool));\n"
+ "-\tzbud_decompress((struct page *)(data), pampd);\n"
+ "-\tzbud_free_and_delist((struct zbud_hdr *)pampd);\n"
+ "-\tatomic_dec(&zcache_curr_eph_pampd_count);\n"
+ "+\tBUG_ON(preemptible());\n"
+ "+\tBUG_ON(pampd_is_remote(pampd));\n"
+ "+\tif (is_ephemeral(pool)) {\n"
+ "+\t\tlocal_irq_save(flags);\n"
+ "+\t\tif (raw)\n"
+ "+\t\t\tzbud_copy_from_pampd(data, bufsize, pampd);\n"
+ "+\t\telse\n"
+ "+\t\t\tret = zbud_decompress(virt_to_page(data), pampd);\n"
+ "+\t\tzbud_free_and_delist((struct zbud_hdr *)pampd);\n"
+ "+\t\tlocal_irq_restore(flags);\n"
+ "+\t\tif (!is_local_client(cli)) {\n"
+ "+\t\t\tatomic_dec(&ramster_foreign_eph_pampd_count);\n"
+ "+\t\t\tWARN_ON_ONCE(atomic_read(&ramster_foreign_eph_pampd_count) < 0);\n"
+ "+\t\t}\n"
+ "+\t\tatomic_dec(&zcache_curr_eph_pampd_count);\n"
+ "+\t\tWARN_ON_ONCE(atomic_read(&zcache_curr_eph_pampd_count) < 0);\n"
+ "+\t} else {\n"
+ "+\t\tif (is_local_client(cli))\n"
+ "+\t\t\tBUG();\n"
+ "+\t\tif (raw)\n"
+ "+\t\t\tzv_copy_from_pampd(data, bufsize, pampd);\n"
+ "+\t\telse\n"
+ "+\t\t\tzv_decompress(virt_to_page(data), pampd);\n"
+ "+\t\tzv_free(cli->xvpool, pampd);\n"
+ "+\t\tif (!is_local_client(cli)) {\n"
+ "+\t\t\tatomic_dec(&ramster_foreign_pers_pampd_count);\n"
+ "+\t\t\tWARN_ON_ONCE(atomic_read(&ramster_foreign_pers_pampd_count) < 0);\n"
+ "+\t\t}\n"
+ "+\t\tatomic_dec(&zcache_curr_pers_pampd_count);\n"
+ "+\t\tWARN_ON_ONCE(atomic_read(&zcache_curr_pers_pampd_count) < 0);\n"
+ "+\t\tret = 0;\n"
+ "+\t}\n"
+ " \treturn ret;\n"
+ " }\n"
+ " \n"
+ "+static bool zcache_pampd_is_remote(void *pampd)\n"
+ "+{\n"
+ "+\treturn pampd_is_remote(pampd);\n"
+ "+}\n"
+ "+\n"
+ " /*\n"
+ "  * free the pampd and remove it from any zcache lists\n"
+ "  * pampd must no longer be pointed to from any tmem data structures!\n"
+ "  */\n"
+ " static void zcache_pampd_free(void *pampd, struct tmem_pool *pool,\n"
+ "-\t\t\t\tstruct tmem_oid *oid, uint32_t index)\n"
+ "+\t\t\t      struct tmem_oid *oid, uint32_t index, bool acct)\n"
+ " {\n"
+ " \tstruct zcache_client *cli = pool->client;\n"
+ "-\n"
+ "-\tif (is_ephemeral(pool)) {\n"
+ "+\tbool eph = is_ephemeral(pool);\n"
+ "+\tstruct zv_hdr *zv;\n"
+ "+\n"
+ "+\tBUG_ON(preemptible());\n"
+ "+\tif (pampd_is_remote(pampd)) {\n"
+ "+\t\tWARN_ON(acct == false);\n"
+ "+\t\tif (oid == NULL) {\n"
+ "+\t\t\t/*\n"
+ "+\t\t\t * a NULL oid means to ignore this pampd free\n"
+ "+\t\t\t * as the remote freeing will be handled elsewhere\n"
+ "+\t\t\t */\n"
+ "+\t\t} else if (eph) {\n"
+ "+\t\t\t/* FIXME remote flush optional but probably good idea */\n"
+ "+\t\t\t/* FIXME get these working properly again */\n"
+ "+\t\t\tatomic_dec(&zcache_curr_eph_pampd_count);\n"
+ "+\t\t\tWARN_ON_ONCE(atomic_read(&zcache_curr_eph_pampd_count) < 0);\n"
+ "+\t\t} else if (pampd_is_intransit(pampd)) {\n"
+ "+\t\t\t/* did a pers remote get_and_free, so just free local */\n"
+ "+\t\t\tpampd = pampd_mask_intransit_and_remote(pampd);\n"
+ "+\t\t\tgoto local_pers;\n"
+ "+\t\t} else {\n"
+ "+\t\t\tstruct flushlist_node *flnode =\n"
+ "+\t\t\t\tramster_flnode_alloc(pool);\n"
+ "+\n"
+ "+\t\t\tflnode->xh.client_id = pampd_remote_node(pampd);\n"
+ "+\t\t\tflnode->xh.pool_id = pool->pool_id;\n"
+ "+\t\t\tflnode->xh.oid = *oid;\n"
+ "+\t\t\tflnode->xh.index = index;\n"
+ "+\t\t\tflnode->rem_op.op = RAMSTER_REMOTIFY_FLUSH_PAGE;\n"
+ "+\t\t\tspin_lock(&zcache_rem_op_list_lock);\n"
+ "+\t\t\tlist_add(&flnode->rem_op.list, &zcache_rem_op_list);\n"
+ "+\t\t\tspin_unlock(&zcache_rem_op_list_lock);\n"
+ "+\t\t\tatomic_dec(&zcache_curr_pers_pampd_count);\n"
+ "+\t\t\tWARN_ON_ONCE(atomic_read(&zcache_curr_pers_pampd_count) < 0);\n"
+ "+\t\t\tatomic_dec(&ramster_remote_pers_pages);\n"
+ "+\t\t\tWARN_ON_ONCE(atomic_read(&ramster_remote_pers_pages) < 0);\n"
+ "+\t\t}\n"
+ "+\t} else if (eph) {\n"
+ " \t\tzbud_free_and_delist((struct zbud_hdr *)pampd);\n"
+ "-\t\tatomic_dec(&zcache_curr_eph_pampd_count);\n"
+ "-\t\tBUG_ON(atomic_read(&zcache_curr_eph_pampd_count) < 0);\n"
+ "+\t\tif (!is_local_client(pool->client)) {\n"
+ "+\t\t\tatomic_dec(&ramster_foreign_eph_pampd_count);\n"
+ "+\t\t\tWARN_ON_ONCE(atomic_read(&ramster_foreign_eph_pampd_count) < 0);\n"
+ "+\t\t}\n"
+ "+\t\tif (acct)\n"
+ "+\t\t\tatomic_dec(&zcache_curr_eph_pampd_count);\n"
+ "+\t\t\t/* FIXME get these working properly again */\n"
+ "+\t\t\tWARN_ON_ONCE(atomic_read(&zcache_curr_eph_pampd_count) < 0);\n"
+ " \t} else {\n"
+ "-\t\tzv_free(cli->xvpool, (struct zv_hdr *)pampd);\n"
+ "-\t\tatomic_dec(&zcache_curr_pers_pampd_count);\n"
+ "-\t\tBUG_ON(atomic_read(&zcache_curr_pers_pampd_count) < 0);\n"
+ "+local_pers:\n"
+ "+\t\tzv = (struct zv_hdr *)pampd;\n"
+ "+\t\tif (!is_local_client(pool->client)) {\n"
+ "+\t\t\tatomic_dec(&ramster_foreign_pers_pampd_count);\n"
+ "+\t\t\tWARN_ON_ONCE(atomic_read(&ramster_foreign_pers_pampd_count) < 0);\n"
+ "+\t\t}\n"
+ "+\t\tzv_free(cli->xvpool, zv);\n"
+ "+\t\tif (acct)\n"
+ "+\t\t\tatomic_dec(&zcache_curr_pers_pampd_count);\n"
+ "+\t\t/* FIXME get these working properly again */\n"
+ "+\t\tWARN_ON_ONCE(atomic_read(&zcache_curr_pers_pampd_count) < 0);\n"
+ " \t}\n"
+ " }\n"
+ " \n"
+ "-static void zcache_pampd_free_obj(struct tmem_pool *pool, struct tmem_obj *obj)\n"
+ "+static void zcache_pampd_free_obj(struct tmem_pool *pool,\n"
+ "+\t\t\t\t\tstruct tmem_obj *obj)\n"
+ " {\n"
+ "+\tstruct flushlist_node *flnode;\n"
+ "+\n"
+ "+\tBUG_ON(preemptible());\n"
+ "+\tif (obj->extra == NULL)\n"
+ "+\t\treturn;\n"
+ "+\tBUG_ON(!pampd_is_remote(obj->extra));\n"
+ "+\tflnode = ramster_flnode_alloc(pool);\n"
+ "+\tflnode->xh.client_id = pampd_remote_node(obj->extra);\n"
+ "+\tflnode->xh.pool_id = pool->pool_id;\n"
+ "+\tflnode->xh.oid = obj->oid;\n"
+ "+\tflnode->xh.index = FLUSH_ENTIRE_OBJECT;\n"
+ "+\tflnode->rem_op.op = RAMSTER_REMOTIFY_FLUSH_OBJ;\n"
+ "+\tspin_lock(&zcache_rem_op_list_lock);\n"
+ "+\tlist_add(&flnode->rem_op.list, &zcache_rem_op_list);\n"
+ "+\tspin_unlock(&zcache_rem_op_list_lock);\n"
+ " }\n"
+ " \n"
+ "-static void zcache_pampd_new_obj(struct tmem_obj *obj)\n"
+ "+void zcache_pampd_new_obj(struct tmem_obj *obj)\n"
+ " {\n"
+ "+\tobj->extra = NULL;\n"
+ " }\n"
+ " \n"
+ "-static int zcache_pampd_replace_in_obj(void *pampd, struct tmem_obj *obj)\n"
+ "+int zcache_pampd_replace_in_obj(void *new_pampd, struct tmem_obj *obj)\n"
+ " {\n"
+ "-\treturn -1;\n"
+ "+\tint ret = -1;\n"
+ "+\n"
+ "+\tif (new_pampd != NULL) {\n"
+ "+\t\tif (obj->extra == NULL)\n"
+ "+\t\t\tobj->extra = new_pampd;\n"
+ "+\t\t/* enforce that all remote pages in an object reside\n"
+ "+\t\t * in the same node! */\n"
+ "+\t\telse if (pampd_remote_node(new_pampd) !=\n"
+ "+\t\t\t\tpampd_remote_node((void *)(obj->extra)))\n"
+ "+\t\t\tBUG();\n"
+ "+\t\tret = 0;\n"
+ "+\t}\n"
+ "+\treturn ret;\n"
+ " }\n"
+ " \n"
+ "-static bool zcache_pampd_is_remote(void *pampd)\n"
+ "+/*\n"
+ "+ * Called by the message handler after a (still compressed) page has been\n"
+ "+ * fetched from the remote machine in response to an \"is_remote\" tmem_get\n"
+ "+ * or persistent tmem_localify.  For a tmem_get, \"extra\" is the address of\n"
+ "+ * the page that is to be filled to succesfully resolve the tmem_get; for\n"
+ "+ * a (persistent) tmem_localify, \"extra\" is NULL (as the data is placed only\n"
+ "+ * in the local zcache).  \"data\" points to \"size\" bytes of (compressed) data\n"
+ "+ * passed in the message.  In the case of a persistent remote get, if\n"
+ "+ * pre-allocation was successful (see zcache_repatriate_preload), the page\n"
+ "+ * is placed into both local zcache and at \"extra\".\n"
+ "+ */\n"
+ "+int zcache_localify(int pool_id, struct tmem_oid *oidp,\n"
+ "+\t\t\tuint32_t index, char *data, size_t size,\n"
+ "+\t\t\tvoid *extra)\n"
+ " {\n"
+ "-\treturn 0;\n"
+ "+\tint ret = -ENOENT;\n"
+ "+\tunsigned long flags;\n"
+ "+\tstruct tmem_pool *pool;\n"
+ "+\tbool ephemeral, delete = false;\n"
+ "+\tsize_t clen = PAGE_SIZE;\n"
+ "+\tvoid *pampd, *saved_hb;\n"
+ "+\tstruct tmem_obj *obj;\n"
+ "+\n"
+ "+\tpool = zcache_get_pool_by_id(LOCAL_CLIENT, pool_id);\n"
+ "+\tif (unlikely(pool == NULL))\n"
+ "+\t\t/* pool doesn't exist anymore */\n"
+ "+\t\tgoto out;\n"
+ "+\tephemeral = is_ephemeral(pool);\n"
+ "+\tlocal_irq_save(flags);  /* FIXME: maybe only disable softirqs? */\n"
+ "+\tpampd = tmem_localify_get_pampd(pool, oidp, index, &obj, &saved_hb);\n"
+ "+\tif (pampd == NULL) {\n"
+ "+\t\t/* hmmm... must have been a flush while waiting */\n"
+ "+#if 1\n"
+ "+\t\tpr_err(\"UNTESTED pampd==NULL in zcache_localify\\n\");\n"
+ "+#endif\n"
+ "+\t\tif (ephemeral)\n"
+ "+\t\t\tramster_remote_eph_pages_unsucc_get++;\n"
+ "+\t\telse\n"
+ "+\t\t\tramster_remote_pers_pages_unsucc_get++;\n"
+ "+\t\tobj = NULL;\n"
+ "+\t\tgoto finish;\n"
+ "+\t} else if (unlikely(!pampd_is_remote(pampd))) {\n"
+ "+\t\t/* hmmm... must have been a dup put while waiting */\n"
+ "+#if 1\n"
+ "+\t\tpr_err(\"UNTESTED dup while waiting in zcache_localify\\n\");\n"
+ "+#endif\n"
+ "+\t\tif (ephemeral)\n"
+ "+\t\t\tramster_remote_eph_pages_unsucc_get++;\n"
+ "+\t\telse\n"
+ "+\t\t\tramster_remote_pers_pages_unsucc_get++;\n"
+ "+\t\tobj = NULL;\n"
+ "+\t\tpampd = NULL;\n"
+ "+\t\tret = -EEXIST;\n"
+ "+\t\tgoto finish;\n"
+ "+\t} else if (size == 0) {\n"
+ "+\t\t/* no remote data, delete the local is_remote pampd */\n"
+ "+\t\tpampd = NULL;\n"
+ "+\t\tif (ephemeral)\n"
+ "+\t\t\tramster_remote_eph_pages_unsucc_get++;\n"
+ "+\t\telse\n"
+ "+\t\t\tBUG();\n"
+ "+\t\tdelete = true;\n"
+ "+\t\tgoto finish;\n"
+ "+\t}\n"
+ "+\tif (!ephemeral && pampd_is_intransit(pampd)) {\n"
+ "+\t\t/* localify to zcache */\n"
+ "+\t\tpampd = pampd_mask_intransit_and_remote(pampd);\n"
+ "+\t\tzv_copy_to_pampd(pampd, data, size);\n"
+ "+\t} else {\n"
+ "+\t\tpampd = NULL;\n"
+ "+\t\tobj = NULL;\n"
+ "+\t}\n"
+ "+\tif (extra != NULL) {\n"
+ "+\t\t/* decompress direct-to-memory to complete remotify */\n"
+ "+\t\tret = lzo1x_decompress_safe((char *)data, size,\n"
+ "+\t\t\t\t\t\t(char *)extra, &clen);\n"
+ "+\t\tBUG_ON(ret != LZO_E_OK);\n"
+ "+\t\tBUG_ON(clen != PAGE_SIZE);\n"
+ "+\t}\n"
+ "+\tif (ephemeral)\n"
+ "+\t\tramster_remote_eph_pages_succ_get++;\n"
+ "+\telse\n"
+ "+\t\tramster_remote_pers_pages_succ_get++;\n"
+ "+\tret = 0;\n"
+ "+finish:\n"
+ "+\ttmem_localify_finish(obj, index, pampd, saved_hb, delete);\n"
+ "+\tzcache_put_pool(pool);\n"
+ "+\tlocal_irq_restore(flags);\n"
+ "+out:\n"
+ "+\treturn ret;\n"
+ "+}\n"
+ "+\n"
+ "+/*\n"
+ "+ * Called on a remote persistent tmem_get to attempt to preallocate\n"
+ "+ * local storage for the data contained in the remote persistent page.\n"
+ "+ * If succesfully preallocated, returns the pampd, marked as remote and\n"
+ "+ * in_transit.  Else returns NULL.  Note that the appropriate tmem data\n"
+ "+ * structure must be locked.\n"
+ "+ */\n"
+ "+static void *zcache_pampd_repatriate_preload(void *pampd,\n"
+ "+\t\t\t\t\t\tstruct tmem_pool *pool,\n"
+ "+\t\t\t\t\t\tstruct tmem_oid *oid,\n"
+ "+\t\t\t\t\t\tuint32_t index,\n"
+ "+\t\t\t\t\t\tbool *intransit)\n"
+ "+{\n"
+ "+\tint clen = pampd_remote_size(pampd);\n"
+ "+\tvoid *ret_pampd = NULL;\n"
+ "+\tunsigned long flags;\n"
+ "+\n"
+ "+\tif (!pampd_is_remote(pampd))\n"
+ "+\t\tBUG();\n"
+ "+\tif (is_ephemeral(pool))\n"
+ "+\t\tBUG();\n"
+ "+\tif (pampd_is_intransit(pampd)) {\n"
+ "+\t\t/*\n"
+ "+\t\t * to avoid multiple allocations (and maybe a memory leak)\n"
+ "+\t\t * don't preallocate if already in the process of being\n"
+ "+\t\t * repatriated\n"
+ "+\t\t */\n"
+ "+\t\t*intransit = true;\n"
+ "+\t\tgoto out;\n"
+ "+\t}\n"
+ "+#if 0\n"
+ "+{\n"
+ "+static unsigned long cnt;\n"
+ "+cnt++;\n"
+ "+if (!(cnt&(cnt-1)))\n"
+ "+pr_err(\"TESTING zcache_pampd_repat_preload, size=%d, cksum=??, cnt=%lu\\n\",\n"
+ "+clen, cnt);\n"
+ "+}\n"
+ "+#endif\n"
+ "+\t*intransit = false;\n"
+ "+\tlocal_irq_save(flags);\n"
+ "+\tret_pampd = (void *)zv_alloc(pool, oid, index, clen);\n"
+ "+\tif (ret_pampd != NULL) {\n"
+ "+\t\t/*\n"
+ "+\t\t *  a pampd is marked intransit if it is remote and space has\n"
+ "+\t\t *  been allocated for it locally (note, only happens for\n"
+ "+\t\t *  persistent pages, in which case the remote copy is freed)\n"
+ "+\t\t */\n"
+ "+\t\tret_pampd = pampd_mark_intransit(ret_pampd);\n"
+ "+\t\tatomic_dec(&ramster_remote_pers_pages);\n"
+ "+\t\tWARN_ON_ONCE(atomic_read(&ramster_remote_pers_pages) < 0);\n"
+ "+\t} else\n"
+ "+\t\tramster_pers_pages_remote_nomem++;\n"
+ "+\tlocal_irq_restore(flags);\n"
+ "+out:\n"
+ "+\treturn ret_pampd;\n"
+ "+}\n"
+ "+\n"
+ "+/*\n"
+ "+ * Called on a remote tmem_get to invoke a message to fetch the page.\n"
+ "+ * Might sleep so no tmem locks can be held.  \"extra\" is passed\n"
+ "+ * all the way through the round-trip messaging to zcache_localify.\n"
+ "+ */\n"
+ "+static int zcache_pampd_repatriate(void *fake_pampd, void *real_pampd,\n"
+ "+\t\t\t\t   struct tmem_pool *pool,\n"
+ "+\t\t\t\t   struct tmem_oid *oid, uint32_t index,\n"
+ "+\t\t\t\t   bool free, void *extra)\n"
+ "+{\n"
+ "+\tstruct tmem_xhandle xh;\n"
+ "+\tint ret;\n"
+ "+\n"
+ "+\tif (pampd_is_intransit(real_pampd))\n"
+ "+\t\t/* have local space pre-reserved, so free remote copy */\n"
+ "+\t\tfree = true;\n"
+ "+\txh = tmem_xhandle_fill(LOCAL_CLIENT, pool, oid, index);\n"
+ "+\t/* unreliable request/response for now */\n"
+ "+\tret = ramster_remote_async_get(&xh, free,\n"
+ "+\t\t\t\t\tpampd_remote_node(fake_pampd),\n"
+ "+\t\t\t\t\tpampd_remote_size(fake_pampd),\n"
+ "+\t\t\t\t\tpampd_remote_cksum(fake_pampd),\n"
+ "+\t\t\t\t\textra);\n"
+ "+#if 1\n"
+ "+\tif (ret != 0 && ret != -ENOENT)\n"
+ "+\t\tpr_err(\"TESTING zcache_pampd_repatriate returns, ret=%d\\n\",\n"
+ "+\t\t\tret);\n"
+ "+#endif\n"
+ "+\treturn ret;\n"
+ " }\n"
+ " \n"
+ " static struct tmem_pamops zcache_pamops = {\n"
+ " \t.create = zcache_pampd_create,\n"
+ " \t.get_data = zcache_pampd_get_data,\n"
+ "-\t.get_data_and_free = zcache_pampd_get_data_and_free,\n"
+ " \t.free = zcache_pampd_free,\n"
+ "+\t.get_data_and_free = zcache_pampd_get_data_and_free,\n"
+ " \t.free_obj = zcache_pampd_free_obj,\n"
+ "+\t.is_remote = zcache_pampd_is_remote,\n"
+ "+\t.repatriate_preload = zcache_pampd_repatriate_preload,\n"
+ "+\t.repatriate = zcache_pampd_repatriate,\n"
+ " \t.new_obj = zcache_pampd_new_obj,\n"
+ " \t.replace_in_obj = zcache_pampd_replace_in_obj,\n"
+ "-\t.is_remote = zcache_pampd_is_remote,\n"
+ " };\n"
+ " \n"
+ " /*\n"
+ "@@ -1342,9 +2293,13 @@ static int zcache_cpu_notifier(struct notifier_block *nb,\n"
+ " \t\tper_cpu(zcache_workmem, cpu) =\n"
+ " \t\t\tkzalloc(LZO1X_MEM_COMPRESS,\n"
+ " \t\t\t\tGFP_KERNEL | __GFP_REPEAT);\n"
+ "+\t\tper_cpu(zcache_remoteputmem, cpu) =\n"
+ "+\t\t\tkzalloc(PAGE_SIZE, GFP_KERNEL | __GFP_REPEAT);\n"
+ " \t\tbreak;\n"
+ " \tcase CPU_DEAD:\n"
+ " \tcase CPU_UP_CANCELED:\n"
+ "+\t\tkfree(per_cpu(zcache_remoteputmem, cpu));\n"
+ "+\t\tper_cpu(zcache_remoteputmem, cpu) = NULL;\n"
+ " \t\tfree_pages((unsigned long)per_cpu(zcache_dstmem, cpu),\n"
+ " \t\t\t\tLZO_DSTMEM_PAGE_ORDER);\n"
+ " \t\tper_cpu(zcache_dstmem, cpu) = NULL;\n"
+ "@@ -1427,6 +2382,7 @@ ZCACHE_SYSFS_RO(aborted_preload);\n"
+ " ZCACHE_SYSFS_RO(aborted_shrink);\n"
+ " ZCACHE_SYSFS_RO(compress_poor);\n"
+ " ZCACHE_SYSFS_RO(mean_compress_poor);\n"
+ "+ZCACHE_SYSFS_RO(policy_percent_exceeded);\n"
+ " ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_raw_pages);\n"
+ " ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_zpages);\n"
+ " ZCACHE_SYSFS_RO_ATOMIC(curr_obj_count);\n"
+ "@@ -1451,6 +2407,7 @@ static struct attribute *zcache_attrs[] = {\n"
+ " \t&zcache_flobj_found_attr.attr,\n"
+ " \t&zcache_failed_eph_puts_attr.attr,\n"
+ " \t&zcache_failed_pers_puts_attr.attr,\n"
+ "+\t&zcache_policy_percent_exceeded_attr.attr,\n"
+ " \t&zcache_compress_poor_attr.attr,\n"
+ " \t&zcache_mean_compress_poor_attr.attr,\n"
+ " \t&zcache_zbud_curr_raw_pages_attr.attr,\n"
+ "@@ -1483,6 +2440,151 @@ static struct attribute_group zcache_attr_group = {\n"
+ " \t.name = \"zcache\",\n"
+ " };\n"
+ " \n"
+ "+#define RAMSTER_SYSFS_RO(_name) \\\n"
+ "+\tstatic ssize_t ramster_##_name##_show(struct kobject *kobj, \\\n"
+ "+\t\t\t\tstruct kobj_attribute *attr, char *buf) \\\n"
+ "+\t{ \\\n"
+ "+\t\treturn sprintf(buf, \"%lu\\n\", ramster_##_name); \\\n"
+ "+\t} \\\n"
+ "+\tstatic struct kobj_attribute ramster_##_name##_attr = { \\\n"
+ "+\t\t.attr = { .name = __stringify(_name), .mode = 0444 }, \\\n"
+ "+\t\t.show = ramster_##_name##_show, \\\n"
+ "+\t}\n"
+ "+\n"
+ "+#define RAMSTER_SYSFS_RW(_name) \\\n"
+ "+\tstatic ssize_t ramster_##_name##_show(struct kobject *kobj, \\\n"
+ "+\t\t\t\tstruct kobj_attribute *attr, char *buf) \\\n"
+ "+\t{ \\\n"
+ "+\t\treturn sprintf(buf, \"%lu\\n\", ramster_##_name); \\\n"
+ "+\t} \\\n"
+ "+\tstatic ssize_t ramster_##_name##_store(struct kobject *kobj, \\\n"
+ "+\t\tstruct kobj_attribute *attr, const char *buf, size_t count) \\\n"
+ "+\t{ \\\n"
+ "+\t\tint err; \\\n"
+ "+\t\tunsigned long enable; \\\n"
+ "+\t\terr = strict_strtoul(buf, 10, &enable); \\\n"
+ "+\t\tif (err) \\\n"
+ "+\t\t\treturn -EINVAL; \\\n"
+ "+\t\tramster_##_name = enable; \\\n"
+ "+\t\treturn count; \\\n"
+ "+\t} \\\n"
+ "+\tstatic struct kobj_attribute ramster_##_name##_attr = { \\\n"
+ "+\t\t.attr = { .name = __stringify(_name), .mode = 0644 }, \\\n"
+ "+\t\t.show = ramster_##_name##_show, \\\n"
+ "+\t\t.store = ramster_##_name##_store, \\\n"
+ "+\t}\n"
+ "+\n"
+ "+#define RAMSTER_SYSFS_RO_ATOMIC(_name) \\\n"
+ "+\tstatic ssize_t ramster_##_name##_show(struct kobject *kobj, \\\n"
+ "+\t\t\t\tstruct kobj_attribute *attr, char *buf) \\\n"
+ "+\t{ \\\n"
+ "+\t    return sprintf(buf, \"%d\\n\", atomic_read(&ramster_##_name)); \\\n"
+ "+\t} \\\n"
+ "+\tstatic struct kobj_attribute ramster_##_name##_attr = { \\\n"
+ "+\t\t.attr = { .name = __stringify(_name), .mode = 0444 }, \\\n"
+ "+\t\t.show = ramster_##_name##_show, \\\n"
+ "+\t}\n"
+ "+\n"
+ "+RAMSTER_SYSFS_RO_ATOMIC(remote_pers_pages);\n"
+ "+RAMSTER_SYSFS_RW(pers_remotify_enable);\n"
+ "+RAMSTER_SYSFS_RW(eph_remotify_enable);\n"
+ "+RAMSTER_SYSFS_RO(eph_pages_remoted);\n"
+ "+RAMSTER_SYSFS_RO(eph_pages_remote_failed);\n"
+ "+RAMSTER_SYSFS_RO(pers_pages_remoted);\n"
+ "+RAMSTER_SYSFS_RO(pers_pages_remote_failed);\n"
+ "+RAMSTER_SYSFS_RO(pers_pages_remote_nomem);\n"
+ "+RAMSTER_SYSFS_RO(remote_pages_flushed);\n"
+ "+RAMSTER_SYSFS_RO(remote_page_flushes_failed);\n"
+ "+RAMSTER_SYSFS_RO(remote_objects_flushed);\n"
+ "+RAMSTER_SYSFS_RO(remote_object_flushes_failed);\n"
+ "+RAMSTER_SYSFS_RO(remote_eph_pages_succ_get);\n"
+ "+RAMSTER_SYSFS_RO(remote_eph_pages_unsucc_get);\n"
+ "+RAMSTER_SYSFS_RO(remote_pers_pages_succ_get);\n"
+ "+RAMSTER_SYSFS_RO(remote_pers_pages_unsucc_get);\n"
+ "+RAMSTER_SYSFS_RO_ATOMIC(foreign_eph_pampd_count);\n"
+ "+RAMSTER_SYSFS_RO(foreign_eph_pampd_count_max);\n"
+ "+RAMSTER_SYSFS_RO_ATOMIC(foreign_pers_pampd_count);\n"
+ "+RAMSTER_SYSFS_RO(foreign_pers_pampd_count_max);\n"
+ "+RAMSTER_SYSFS_RO_ATOMIC(curr_flnode_count);\n"
+ "+RAMSTER_SYSFS_RO(curr_flnode_count_max);\n"
+ "+\n"
+ "+#define MANUAL_NODES 8\n"
+ "+static bool ramster_nodes_manual_up[MANUAL_NODES];\n"
+ "+static ssize_t ramster_manual_node_up_show(struct kobject *kobj,\n"
+ "+\t\t\t\tstruct kobj_attribute *attr, char *buf)\n"
+ "+{\n"
+ "+\tint i;\n"
+ "+\tchar *p = buf;\n"
+ "+\tfor (i = 0; i < MANUAL_NODES; i++)\n"
+ "+\t\tif (ramster_nodes_manual_up[i])\n"
+ "+\t\t\tp += sprintf(p, \"%d \", i);\n"
+ "+\tp += sprintf(p, \"\\n\");\n"
+ "+\treturn p - buf;\n"
+ "+}\n"
+ "+\n"
+ "+static ssize_t ramster_manual_node_up_store(struct kobject *kobj,\n"
+ "+\t\tstruct kobj_attribute *attr, const char *buf, size_t count)\n"
+ "+{\n"
+ "+\tint err;\n"
+ "+\tunsigned long node_num;\n"
+ "+\textern void o2net_hb_node_up_manual(int);\n"
+ "+\n"
+ "+\terr = strict_strtoul(buf, 10, &node_num);\n"
+ "+\tif (err) {\n"
+ "+\t\tpr_err(\"bad strtoul?\\n\");\n"
+ "+\t\treturn -EINVAL;\n"
+ "+\t}\n"
+ "+\tif (node_num >= MANUAL_NODES) {\n"
+ "+\t\tpr_err(\"bad node_num=%lu?\\n\", node_num);\n"
+ "+\t\treturn -EINVAL;\n"
+ "+\t}\n"
+ "+\tif (ramster_nodes_manual_up[node_num]) {\n"
+ "+\t\tpr_err(\"node %d already up, ignoring\\n\", (int)node_num);\n"
+ "+\t} else {\n"
+ "+\t\tramster_nodes_manual_up[node_num] = true;\n"
+ "+\t\to2net_hb_node_up_manual((int)node_num);\n"
+ "+\t}\n"
+ "+\treturn count;\n"
+ "+}\n"
+ "+\n"
+ "+static struct kobj_attribute ramster_manual_node_up_attr = {\n"
+ "+\t.attr = { .name = \"manual_node_up\", .mode = 0644 },\n"
+ "+\t.show = ramster_manual_node_up_show,\n"
+ "+\t.store = ramster_manual_node_up_store,\n"
+ "+};\n"
+ "+\n"
+ "+static struct attribute *ramster_attrs[] = {\n"
+ "+\t&ramster_pers_remotify_enable_attr.attr,\n"
+ "+\t&ramster_eph_remotify_enable_attr.attr,\n"
+ "+\t&ramster_remote_pers_pages_attr.attr,\n"
+ "+\t&ramster_eph_pages_remoted_attr.attr,\n"
+ "+\t&ramster_eph_pages_remote_failed_attr.attr,\n"
+ "+\t&ramster_pers_pages_remoted_attr.attr,\n"
+ "+\t&ramster_pers_pages_remote_failed_attr.attr,\n"
+ "+\t&ramster_pers_pages_remote_nomem_attr.attr,\n"
+ "+\t&ramster_remote_pages_flushed_attr.attr,\n"
+ "+\t&ramster_remote_page_flushes_failed_attr.attr,\n"
+ "+\t&ramster_remote_objects_flushed_attr.attr,\n"
+ "+\t&ramster_remote_object_flushes_failed_attr.attr,\n"
+ "+\t&ramster_remote_eph_pages_succ_get_attr.attr,\n"
+ "+\t&ramster_remote_eph_pages_unsucc_get_attr.attr,\n"
+ "+\t&ramster_remote_pers_pages_succ_get_attr.attr,\n"
+ "+\t&ramster_remote_pers_pages_unsucc_get_attr.attr,\n"
+ "+\t&ramster_foreign_eph_pampd_count_attr.attr,\n"
+ "+\t&ramster_foreign_eph_pampd_count_max_attr.attr,\n"
+ "+\t&ramster_foreign_pers_pampd_count_attr.attr,\n"
+ "+\t&ramster_foreign_pers_pampd_count_max_attr.attr,\n"
+ "+\t&ramster_curr_flnode_count_attr.attr,\n"
+ "+\t&ramster_curr_flnode_count_max_attr.attr,\n"
+ "+\t&ramster_manual_node_up_attr.attr,\n"
+ "+\tNULL,\n"
+ "+};\n"
+ "+\n"
+ "+static struct attribute_group ramster_attr_group = {\n"
+ "+\t.attrs = ramster_attrs,\n"
+ "+\t.name = \"ramster\",\n"
+ "+};\n"
+ "+\n"
+ " #endif /* CONFIG_SYSFS */\n"
+ " /*\n"
+ "  * When zcache is disabled (\"frozen\"), pools can be created and destroyed,\n"
+ "@@ -1527,8 +2629,9 @@ static struct shrinker zcache_shrinker = {\n"
+ "  * zcache shims between cleancache/frontswap ops and tmem\n"
+ "  */\n"
+ " \n"
+ "-static int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp,\n"
+ "-\t\t\t\tuint32_t index, struct page *page)\n"
+ "+int zcache_put(int cli_id, int pool_id, struct tmem_oid *oidp,\n"
+ "+\t\t\tuint32_t index, char *data, size_t size,\n"
+ "+\t\t\tbool raw, int ephemeral)\n"
+ " {\n"
+ " \tstruct tmem_pool *pool;\n"
+ " \tint ret = -1;\n"
+ "@@ -1539,8 +2642,7 @@ static int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp,\n"
+ " \t\tgoto out;\n"
+ " \tif (!zcache_freeze && zcache_do_preload(pool) == 0) {\n"
+ " \t\t/* preload does preempt_disable on success */\n"
+ "-\t\tret = tmem_put(pool, oidp, index, (char *)(page),\n"
+ "-\t\t\t\tPAGE_SIZE, 0, is_ephemeral(pool));\n"
+ "+\t\tret = tmem_put(pool, oidp, index, data, size, raw, ephemeral);\n"
+ " \t\tif (ret < 0) {\n"
+ " \t\t\tif (is_ephemeral(pool))\n"
+ " \t\t\t\tzcache_failed_eph_puts++;\n"
+ "@@ -1560,27 +2662,40 @@ out:\n"
+ " \treturn ret;\n"
+ " }\n"
+ " \n"
+ "-static int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp,\n"
+ "-\t\t\t\tuint32_t index, struct page *page)\n"
+ "+int zcache_get(int cli_id, int pool_id, struct tmem_oid *oidp,\n"
+ "+\t\t\tuint32_t index, char *data, size_t *sizep,\n"
+ "+\t\t\tbool raw, int get_and_free)\n"
+ " {\n"
+ " \tstruct tmem_pool *pool;\n"
+ " \tint ret = -1;\n"
+ "-\tunsigned long flags;\n"
+ "-\tsize_t size = PAGE_SIZE;\n"
+ "+\tbool eph;\n"
+ " \n"
+ "-\tlocal_irq_save(flags);\n"
+ "+\tif (!raw) {\n"
+ "+\t\tBUG_ON(irqs_disabled());\n"
+ "+\t\tBUG_ON(in_softirq());\n"
+ "+\t}\n"
+ " \tpool = zcache_get_pool_by_id(cli_id, pool_id);\n"
+ "+\teph = is_ephemeral(pool);\n"
+ " \tif (likely(pool != NULL)) {\n"
+ " \t\tif (atomic_read(&pool->obj_count) > 0)\n"
+ "-\t\t\tret = tmem_get(pool, oidp, index, (char *)(page),\n"
+ "-\t\t\t\t\t&size, 0, is_ephemeral(pool));\n"
+ "+\t\t\tret = tmem_get(pool, oidp, index, data, sizep,\n"
+ "+\t\t\t\t\traw, get_and_free);\n"
+ " \t\tzcache_put_pool(pool);\n"
+ " \t}\n"
+ "-\tlocal_irq_restore(flags);\n"
+ "+\tWARN_ONCE((!eph && (ret != 0)), \"zcache_get fails on persistent pool, \"\n"
+ "+\t\t\t  \"bad things are very likely to happen soon\\n\");\n"
+ "+#if 1\n"
+ "+{\n"
+ "+if (ret != 0 && ret != -1 && !(ret == -EINVAL && is_ephemeral(pool)))\n"
+ "+pr_err(\"TESTING zcache_get tmem_get returns ret=%d\\n\", ret);\n"
+ "+}\n"
+ "+#endif\n"
+ "+\tif (ret == -EAGAIN)\n"
+ "+\t\tBUG(); /* FIXME... don't need this anymore??? let's ensure */\n"
+ " \treturn ret;\n"
+ " }\n"
+ " \n"
+ "-static int zcache_flush_page(int cli_id, int pool_id,\n"
+ "+int zcache_flush(int cli_id, int pool_id,\n"
+ " \t\t\t\tstruct tmem_oid *oidp, uint32_t index)\n"
+ " {\n"
+ " \tstruct tmem_pool *pool;\n"
+ "@@ -1590,6 +2705,7 @@ static int zcache_flush_page(int cli_id, int pool_id,\n"
+ " \tlocal_irq_save(flags);\n"
+ " \tzcache_flush_total++;\n"
+ " \tpool = zcache_get_pool_by_id(cli_id, pool_id);\n"
+ "+\tramster_do_preload_flnode_only(pool);\n"
+ " \tif (likely(pool != NULL)) {\n"
+ " \t\tif (atomic_read(&pool->obj_count) > 0)\n"
+ " \t\t\tret = tmem_flush_page(pool, oidp, index);\n"
+ "@@ -1601,8 +2717,7 @@ static int zcache_flush_page(int cli_id, int pool_id,\n"
+ " \treturn ret;\n"
+ " }\n"
+ " \n"
+ "-static int zcache_flush_object(int cli_id, int pool_id,\n"
+ "-\t\t\t\tstruct tmem_oid *oidp)\n"
+ "+int zcache_flush_object(int cli_id, int pool_id, struct tmem_oid *oidp)\n"
+ " {\n"
+ " \tstruct tmem_pool *pool;\n"
+ " \tint ret = -1;\n"
+ "@@ -1611,6 +2726,7 @@ static int zcache_flush_object(int cli_id, int pool_id,\n"
+ " \tlocal_irq_save(flags);\n"
+ " \tzcache_flobj_total++;\n"
+ " \tpool = zcache_get_pool_by_id(cli_id, pool_id);\n"
+ "+\tramster_do_preload_flnode_only(pool);\n"
+ " \tif (likely(pool != NULL)) {\n"
+ " \t\tif (atomic_read(&pool->obj_count) > 0)\n"
+ " \t\t\tret = tmem_flush_object(pool, oidp);\n"
+ "@@ -1622,7 +2738,7 @@ static int zcache_flush_object(int cli_id, int pool_id,\n"
+ " \treturn ret;\n"
+ " }\n"
+ " \n"
+ "-static int zcache_destroy_pool(int cli_id, int pool_id)\n"
+ "+int zcache_client_destroy_pool(int cli_id, int pool_id)\n"
+ " {\n"
+ " \tstruct tmem_pool *pool = NULL;\n"
+ " \tstruct zcache_client *cli = NULL;\n"
+ "@@ -1649,13 +2765,17 @@ static int zcache_destroy_pool(int cli_id, int pool_id)\n"
+ " \tret = tmem_destroy_pool(pool);\n"
+ " \tlocal_bh_enable();\n"
+ " \tkfree(pool);\n"
+ "-\tpr_info(\"zcache: destroyed pool id=%d, cli_id=%d\\n\",\n"
+ "-\t\t\tpool_id, cli_id);\n"
+ "+\tpr_info(\"ramster: destroyed pool id=%d cli_id=%d\\n\", pool_id, cli_id);\n"
+ " out:\n"
+ " \treturn ret;\n"
+ " }\n"
+ " \n"
+ "-static int zcache_new_pool(uint16_t cli_id, uint32_t flags)\n"
+ "+static int zcache_destroy_pool(int pool_id)\n"
+ "+{\n"
+ "+\treturn zcache_client_destroy_pool(LOCAL_CLIENT, pool_id);\n"
+ "+}\n"
+ "+\n"
+ "+int zcache_new_pool(uint16_t cli_id, uint32_t flags)\n"
+ " {\n"
+ " \tint poolid = -1;\n"
+ " \tstruct tmem_pool *pool;\n"
+ "@@ -1670,7 +2790,7 @@ static int zcache_new_pool(uint16_t cli_id, uint32_t flags)\n"
+ " \tatomic_inc(&cli->refcount);\n"
+ " \tpool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL);\n"
+ " \tif (pool == NULL) {\n"
+ "-\t\tpr_info(\"zcache: pool creation failed: out of memory\\n\");\n"
+ "+\t\tpr_info(\"ramster: pool creation failed: out of memory\\n\");\n"
+ " \t\tgoto out;\n"
+ " \t}\n"
+ " \n"
+ "@@ -1678,7 +2798,7 @@ static int zcache_new_pool(uint16_t cli_id, uint32_t flags)\n"
+ " \t\tif (cli->tmem_pools[poolid] == NULL)\n"
+ " \t\t\tbreak;\n"
+ " \tif (poolid >= MAX_POOLS_PER_CLIENT) {\n"
+ "-\t\tpr_info(\"zcache: pool creation failed: max exceeded\\n\");\n"
+ "+\t\tpr_info(\"ramster: pool creation failed: max exceeded\\n\");\n"
+ " \t\tkfree(pool);\n"
+ " \t\tpoolid = -1;\n"
+ " \t\tgoto out;\n"
+ "@@ -1688,7 +2808,7 @@ static int zcache_new_pool(uint16_t cli_id, uint32_t flags)\n"
+ " \tpool->pool_id = poolid;\n"
+ " \ttmem_new_pool(pool, flags);\n"
+ " \tcli->tmem_pools[poolid] = pool;\n"
+ "-\tpr_info(\"zcache: created %s tmem pool, id=%d, client=%d\\n\",\n"
+ "+\tpr_info(\"ramster: created %s tmem pool, id=%d, client=%d\\n\",\n"
+ " \t\tflags & TMEM_POOL_PERSIST ? \"persistent\" : \"ephemeral\",\n"
+ " \t\tpoolid, cli_id);\n"
+ " out:\n"
+ "@@ -1697,6 +2817,64 @@ out:\n"
+ " \treturn poolid;\n"
+ " }\n"
+ " \n"
+ "+static int zcache_local_new_pool(uint32_t flags)\n"
+ "+{\n"
+ "+\treturn zcache_new_pool(LOCAL_CLIENT, flags);\n"
+ "+}\n"
+ "+\n"
+ "+int zcache_autocreate_pool(int cli_id, int pool_id, bool ephemeral)\n"
+ "+{\n"
+ "+\tstruct tmem_pool *pool;\n"
+ "+\tstruct zcache_client *cli = NULL;\n"
+ "+\tuint32_t flags = ephemeral ? 0 : TMEM_POOL_PERSIST;\n"
+ "+\tint ret = -1;\n"
+ "+\n"
+ "+\tif (cli_id == LOCAL_CLIENT)\n"
+ "+\t\tgoto out;\n"
+ "+\tif (pool_id >= MAX_POOLS_PER_CLIENT)\n"
+ "+\t\tgoto out;\n"
+ "+\telse if ((unsigned int)cli_id < MAX_CLIENTS)\n"
+ "+\t\tcli = &zcache_clients[cli_id];\n"
+ "+\tif ((ephemeral && !use_cleancache) || (!ephemeral && !use_frontswap))\n"
+ "+\t\tBUG(); /* FIXME, handle more gracefully later */\n"
+ "+\tif (!cli->allocated) {\n"
+ "+\t\tif (zcache_new_client(cli_id))\n"
+ "+\t\t\tBUG(); /* FIXME, handle more gracefully later */\n"
+ "+\t\tcli = &zcache_clients[cli_id];\n"
+ "+\t}\n"
+ "+\tatomic_inc(&cli->refcount);\n"
+ "+\tpool = cli->tmem_pools[pool_id];\n"
+ "+\tif (pool != NULL) {\n"
+ "+\t\tif (pool->persistent && ephemeral) {\n"
+ "+\t\t\tpr_err(\"zcache_autocreate_pool: type mismatch\\n\");\n"
+ "+\t\t\tgoto out;\n"
+ "+\t\t}\n"
+ "+\t\tret = 0;\n"
+ "+\t\tgoto out;\n"
+ "+\t}\n"
+ "+\tpool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL);\n"
+ "+\tif (pool == NULL) {\n"
+ "+\t\tpr_info(\"ramster: pool creation failed: out of memory\\n\");\n"
+ "+\t\tgoto out;\n"
+ "+\t}\n"
+ "+\tatomic_set(&pool->refcount, 0);\n"
+ "+\tpool->client = cli;\n"
+ "+\tpool->pool_id = pool_id;\n"
+ "+\ttmem_new_pool(pool, flags);\n"
+ "+\tcli->tmem_pools[pool_id] = pool;\n"
+ "+\tpr_info(\"ramster: AUTOcreated %s tmem poolid=%d, for remote client=%d\\n\",\n"
+ "+\t\tflags & TMEM_POOL_PERSIST ? \"persistent\" : \"ephemeral\",\n"
+ "+\t\tpool_id, cli_id);\n"
+ "+\tret = 0;\n"
+ "+out:\n"
+ "+\tif (cli == NULL)\n"
+ "+\t\tBUG(); /* FIXME, handle more gracefully later */\n"
+ "+\t\t/* pr_err(\"zcache_autocreate_pool: failed\\n\"); */\n"
+ "+\tif (cli != NULL)\n"
+ "+\t\tatomic_dec(&cli->refcount);\n"
+ "+\treturn ret;\n"
+ "+}\n"
+ "+\n"
+ " /**********\n"
+ "  * Two kernel functionalities currently can be layered on top of tmem.\n"
+ "  * These are \"cleancache\" which is used as a second-chance cache for clean\n"
+ "@@ -1713,8 +2891,12 @@ static void zcache_cleancache_put_page(int pool_id,\n"
+ " \tu32 ind = (u32) index;\n"
+ " \tstruct tmem_oid oid = *(struct tmem_oid *)&key;\n"
+ " \n"
+ "-\tif (likely(ind == index))\n"
+ "-\t\t(void)zcache_put_page(LOCAL_CLIENT, pool_id, &oid, index, page);\n"
+ "+\tif (likely(ind == index)) {\n"
+ "+\t\tchar *kva = page_address(page);\n"
+ "+\n"
+ "+\t\t(void)zcache_put(LOCAL_CLIENT, pool_id, &oid, index,\n"
+ "+\t\t\tkva, PAGE_SIZE, 0, 1);\n"
+ "+\t}\n"
+ " }\n"
+ " \n"
+ " static int zcache_cleancache_get_page(int pool_id,\n"
+ "@@ -1725,8 +2907,15 @@ static int zcache_cleancache_get_page(int pool_id,\n"
+ " \tstruct tmem_oid oid = *(struct tmem_oid *)&key;\n"
+ " \tint ret = -1;\n"
+ " \n"
+ "-\tif (likely(ind == index))\n"
+ "-\t\tret = zcache_get_page(LOCAL_CLIENT, pool_id, &oid, index, page);\n"
+ "+\tpreempt_disable();\n"
+ "+\tif (likely(ind == index)) {\n"
+ "+\t\tchar *kva = page_address(page);\n"
+ "+\t\tsize_t size = PAGE_SIZE;\n"
+ "+\n"
+ "+\t\tret = zcache_get(LOCAL_CLIENT, pool_id, &oid, index,\n"
+ "+\t\t\tkva, &size, 0, 0);\n"
+ "+\t}\n"
+ "+\tpreempt_enable();\n"
+ " \treturn ret;\n"
+ " }\n"
+ " \n"
+ "@@ -1738,7 +2927,7 @@ static void zcache_cleancache_flush_page(int pool_id,\n"
+ " \tstruct tmem_oid oid = *(struct tmem_oid *)&key;\n"
+ " \n"
+ " \tif (likely(ind == index))\n"
+ "-\t\t(void)zcache_flush_page(LOCAL_CLIENT, pool_id, &oid, ind);\n"
+ "+\t\t(void)zcache_flush(LOCAL_CLIENT, pool_id, &oid, ind);\n"
+ " }\n"
+ " \n"
+ " static void zcache_cleancache_flush_inode(int pool_id,\n"
+ "@@ -1752,7 +2941,7 @@ static void zcache_cleancache_flush_inode(int pool_id,\n"
+ " static void zcache_cleancache_flush_fs(int pool_id)\n"
+ " {\n"
+ " \tif (pool_id >= 0)\n"
+ "-\t\t(void)zcache_destroy_pool(LOCAL_CLIENT, pool_id);\n"
+ "+\t\t(void)zcache_destroy_pool(pool_id);\n"
+ " }\n"
+ " \n"
+ " static int zcache_cleancache_init_fs(size_t pagesize)\n"
+ "@@ -1760,7 +2949,7 @@ static int zcache_cleancache_init_fs(size_t pagesize)\n"
+ " \tBUG_ON(sizeof(struct cleancache_filekey) !=\n"
+ " \t\t\t\tsizeof(struct tmem_oid));\n"
+ " \tBUG_ON(pagesize != PAGE_SIZE);\n"
+ "-\treturn zcache_new_pool(LOCAL_CLIENT, 0);\n"
+ "+\treturn zcache_local_new_pool(0);\n"
+ " }\n"
+ " \n"
+ " static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize)\n"
+ "@@ -1769,7 +2958,7 @@ static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize)\n"
+ " \tBUG_ON(sizeof(struct cleancache_filekey) !=\n"
+ " \t\t\t\tsizeof(struct tmem_oid));\n"
+ " \tBUG_ON(pagesize != PAGE_SIZE);\n"
+ "-\treturn zcache_new_pool(LOCAL_CLIENT, 0);\n"
+ "+\treturn zcache_local_new_pool(0);\n"
+ " }\n"
+ " \n"
+ " static struct cleancache_ops zcache_cleancache_ops = {\n"
+ "@@ -1799,7 +2988,7 @@ static int zcache_frontswap_poolid = -1;\n"
+ "  * Swizzling increases objects per swaptype, increasing tmem concurrency\n"
+ "  * for heavy swaploads.  Later, larger nr_cpus -> larger SWIZ_BITS\n"
+ "  */\n"
+ "-#define SWIZ_BITS\t\t4\n"
+ "+#define SWIZ_BITS\t\t8\n"
+ " #define SWIZ_MASK\t\t((1 << SWIZ_BITS) - 1)\n"
+ " #define _oswiz(_type, _ind)\t((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))\n"
+ " #define iswiz(_ind)\t\t(_ind >> SWIZ_BITS)\n"
+ "@@ -1819,12 +3008,14 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,\n"
+ " \tstruct tmem_oid oid = oswiz(type, ind);\n"
+ " \tint ret = -1;\n"
+ " \tunsigned long flags;\n"
+ "+\tchar *kva;\n"
+ " \n"
+ " \tBUG_ON(!PageLocked(page));\n"
+ " \tif (likely(ind64 == ind)) {\n"
+ " \t\tlocal_irq_save(flags);\n"
+ "-\t\tret = zcache_put_page(LOCAL_CLIENT, zcache_frontswap_poolid,\n"
+ "-\t\t\t\t\t&oid, iswiz(ind), page);\n"
+ "+\t\tkva = page_address(page);\n"
+ "+\t\tret = zcache_put(LOCAL_CLIENT, zcache_frontswap_poolid,\n"
+ "+\t\t\t\t&oid, iswiz(ind), kva, PAGE_SIZE, 0, 0);\n"
+ " \t\tlocal_irq_restore(flags);\n"
+ " \t}\n"
+ " \treturn ret;\n"
+ "@@ -1840,10 +3031,16 @@ static int zcache_frontswap_get_page(unsigned type, pgoff_t offset,\n"
+ " \tstruct tmem_oid oid = oswiz(type, ind);\n"
+ " \tint ret = -1;\n"
+ " \n"
+ "+\tpreempt_disable(); /* FIXME, remove this? */\n"
+ " \tBUG_ON(!PageLocked(page));\n"
+ "-\tif (likely(ind64 == ind))\n"
+ "-\t\tret = zcache_get_page(LOCAL_CLIENT, zcache_frontswap_poolid,\n"
+ "-\t\t\t\t\t&oid, iswiz(ind), page);\n"
+ "+\tif (likely(ind64 == ind)) {\n"
+ "+\t\tchar *kva = page_address(page);\n"
+ "+\t\tsize_t size = PAGE_SIZE;\n"
+ "+\n"
+ "+\t\tret = zcache_get(LOCAL_CLIENT, zcache_frontswap_poolid,\n"
+ "+\t\t\t\t\t&oid, iswiz(ind), kva, &size, 0, -1);\n"
+ "+\t}\n"
+ "+\tpreempt_enable(); /* FIXME, remove this? */\n"
+ " \treturn ret;\n"
+ " }\n"
+ " \n"
+ "@@ -1855,7 +3052,7 @@ static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset)\n"
+ " \tstruct tmem_oid oid = oswiz(type, ind);\n"
+ " \n"
+ " \tif (likely(ind64 == ind))\n"
+ "-\t\t(void)zcache_flush_page(LOCAL_CLIENT, zcache_frontswap_poolid,\n"
+ "+\t\t(void)zcache_flush(LOCAL_CLIENT, zcache_frontswap_poolid,\n"
+ " \t\t\t\t\t&oid, iswiz(ind));\n"
+ " }\n"
+ " \n"
+ "@@ -1877,7 +3074,7 @@ static void zcache_frontswap_init(unsigned ignored)\n"
+ " \t/* a single tmem poolid is used for all frontswap \"types\" (swapfiles) */\n"
+ " \tif (zcache_frontswap_poolid < 0)\n"
+ " \t\tzcache_frontswap_poolid =\n"
+ "-\t\t\tzcache_new_pool(LOCAL_CLIENT, TMEM_POOL_PERSIST);\n"
+ "+\t\t\t\tzcache_local_new_pool(TMEM_POOL_PERSIST);\n"
+ " }\n"
+ " \n"
+ " static struct frontswap_ops zcache_frontswap_ops = {\n"
+ "@@ -1898,19 +3095,125 @@ struct frontswap_ops zcache_frontswap_register_ops(void)\n"
+ " #endif\n"
+ " \n"
+ " /*\n"
+ "+ * frontswap selfshrinking\n"
+ "+ */\n"
+ "+\n"
+ "+#ifdef CONFIG_FRONTSWAP\n"
+ "+/* In HZ, controls frequency of worker invocation. */\n"
+ "+static unsigned int selfshrink_interval __read_mostly = 5;\n"
+ "+\n"
+ "+static void selfshrink_process(struct work_struct *work);\n"
+ "+static DECLARE_DELAYED_WORK(selfshrink_worker, selfshrink_process);\n"
+ "+\n"
+ "+/* Enable/disable with sysfs. */\n"
+ "+static bool frontswap_selfshrinking __read_mostly;\n"
+ "+\n"
+ "+/* Enable/disable with kernel boot option. */\n"
+ "+static bool use_frontswap_selfshrink __initdata = true;\n"
+ "+\n"
+ "+/*\n"
+ "+ * The default values for the following parameters were deemed reasonable\n"
+ "+ * by experimentation, may be workload-dependent, and can all be\n"
+ "+ * adjusted via sysfs.\n"
+ "+ */\n"
+ "+\n"
+ "+/* Control rate for frontswap shrinking. Higher hysteresis is slower. */\n"
+ "+static unsigned int frontswap_hysteresis __read_mostly = 20;\n"
+ "+\n"
+ "+/*\n"
+ "+ * Number of selfshrink worker invocations to wait before observing that\n"
+ "+ * frontswap selfshrinking should commence. Note that selfshrinking does\n"
+ "+ * not use a separate worker thread.\n"
+ "+ */\n"
+ "+static unsigned int frontswap_inertia __read_mostly = 3;\n"
+ "+\n"
+ "+/* Countdown to next invocation of frontswap_shrink() */\n"
+ "+static unsigned long frontswap_inertia_counter;\n"
+ "+\n"
+ "+/*\n"
+ "+ * Invoked by the selfshrink worker thread, uses current number of pages\n"
+ "+ * in frontswap (frontswap_curr_pages()), previous status, and control\n"
+ "+ * values (hysteresis and inertia) to determine if frontswap should be\n"
+ "+ * shrunk and what the new frontswap size should be.  Note that\n"
+ "+ * frontswap_shrink is essentially a partial swapoff that immediately\n"
+ "+ * transfers pages from the \"swap device\" (frontswap) back into kernel\n"
+ "+ * RAM; despite the name, frontswap \"shrinking\" is very different from\n"
+ "+ * the \"shrinker\" interface used by the kernel MM subsystem to reclaim\n"
+ "+ * memory.\n"
+ "+ */\n"
+ "+static void frontswap_selfshrink(void)\n"
+ "+{\n"
+ "+\tstatic unsigned long cur_frontswap_pages;\n"
+ "+\tstatic unsigned long last_frontswap_pages;\n"
+ "+\tstatic unsigned long tgt_frontswap_pages;\n"
+ "+\n"
+ "+\tlast_frontswap_pages = cur_frontswap_pages;\n"
+ "+\tcur_frontswap_pages = frontswap_curr_pages();\n"
+ "+\tif (!cur_frontswap_pages ||\n"
+ "+\t\t\t(cur_frontswap_pages > last_frontswap_pages)) {\n"
+ "+\t\tfrontswap_inertia_counter = frontswap_inertia;\n"
+ "+\t\treturn;\n"
+ "+\t}\n"
+ "+\tif (frontswap_inertia_counter && --frontswap_inertia_counter)\n"
+ "+\t\treturn;\n"
+ "+\tif (cur_frontswap_pages <= frontswap_hysteresis)\n"
+ "+\t\ttgt_frontswap_pages = 0;\n"
+ "+\telse\n"
+ "+\t\ttgt_frontswap_pages = cur_frontswap_pages -\n"
+ "+\t\t\t(cur_frontswap_pages / frontswap_hysteresis);\n"
+ "+\tfrontswap_shrink(tgt_frontswap_pages);\n"
+ "+}\n"
+ "+\n"
+ "+static int __init ramster_nofrontswap_selfshrink_setup(char *s)\n"
+ "+{\n"
+ "+\tuse_frontswap_selfshrink = false;\n"
+ "+\treturn 1;\n"
+ "+}\n"
+ "+\n"
+ "+__setup(\"noselfshrink\", ramster_nofrontswap_selfshrink_setup);\n"
+ "+\n"
+ "+static void selfshrink_process(struct work_struct *work)\n"
+ "+{\n"
+ "+\tif (frontswap_selfshrinking && frontswap_enabled) {\n"
+ "+\t\tfrontswap_selfshrink();\n"
+ "+\t\tschedule_delayed_work(&selfshrink_worker,\n"
+ "+\t\t\tselfshrink_interval * HZ);\n"
+ "+\t}\n"
+ "+}\n"
+ "+\n"
+ "+static int ramster_enabled;\n"
+ "+\n"
+ "+static int __init ramster_selfshrink_init(void)\n"
+ "+{\n"
+ "+\tfrontswap_selfshrinking = ramster_enabled && use_frontswap_selfshrink;\n"
+ "+\tif (frontswap_selfshrinking)\n"
+ "+\t\tpr_info(\"ramster: Initializing frontswap \"\n"
+ "+\t\t\t\t\t\"selfshrinking driver.\\n\");\n"
+ "+\telse\n"
+ "+\t\treturn -ENODEV;\n"
+ "+\n"
+ "+\tschedule_delayed_work(&selfshrink_worker, selfshrink_interval * HZ);\n"
+ "+\n"
+ "+\treturn 0;\n"
+ "+}\n"
+ "+\n"
+ "+subsys_initcall(ramster_selfshrink_init);\n"
+ "+#endif\n"
+ "+\n"
+ "+/*\n"
+ "  * zcache initialization\n"
+ "- * NOTE FOR NOW zcache MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR\n"
+ "+ * NOTE FOR NOW ramster MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR\n"
+ "  * NOTHING HAPPENS!\n"
+ "  */\n"
+ " \n"
+ "-static int zcache_enabled;\n"
+ "+static int ramster_enabled;\n"
+ " \n"
+ "-static int __init enable_zcache(char *s)\n"
+ "+static int __init enable_ramster(char *s)\n"
+ " {\n"
+ "-\tzcache_enabled = 1;\n"
+ "+\tramster_enabled = 1;\n"
+ " \treturn 1;\n"
+ " }\n"
+ "-__setup(\"zcache\", enable_zcache);\n"
+ "+__setup(\"ramster\", enable_ramster);\n"
+ " \n"
+ " /* allow independent dynamic disabling of cleancache and frontswap */\n"
+ " \n"
+ "@@ -1918,16 +3221,22 @@ static int use_cleancache = 1;\n"
+ " \n"
+ " static int __init no_cleancache(char *s)\n"
+ " {\n"
+ "+\tpr_info(\"INIT no_cleancache called\\n\");\n"
+ " \tuse_cleancache = 0;\n"
+ " \treturn 1;\n"
+ " }\n"
+ " \n"
+ "-__setup(\"nocleancache\", no_cleancache);\n"
+ "+/*\n"
+ "+ * FIXME: need to guarantee this gets checked before zcache_init is called\n"
+ "+ * What is the correct way to achieve this?\n"
+ "+ */\n"
+ "+early_param(\"nocleancache\", no_cleancache);\n"
+ " \n"
+ " static int use_frontswap = 1;\n"
+ " \n"
+ " static int __init no_frontswap(char *s)\n"
+ " {\n"
+ "+\tpr_info(\"INIT no_frontswap called\\n\");\n"
+ " \tuse_frontswap = 0;\n"
+ " \treturn 1;\n"
+ " }\n"
+ "@@ -1940,20 +3249,22 @@ static int __init zcache_init(void)\n"
+ " \n"
+ " #ifdef CONFIG_SYSFS\n"
+ " \tret = sysfs_create_group(mm_kobj, &zcache_attr_group);\n"
+ "+\tret = sysfs_create_group(mm_kobj, &ramster_attr_group);\n"
+ " \tif (ret) {\n"
+ "-\t\tpr_err(\"zcache: can't create sysfs\\n\");\n"
+ "+\t\tpr_err(\"ramster: can't create sysfs\\n\");\n"
+ " \t\tgoto out;\n"
+ " \t}\n"
+ " #endif /* CONFIG_SYSFS */\n"
+ " #if defined(CONFIG_CLEANCACHE) || defined(CONFIG_FRONTSWAP)\n"
+ "-\tif (zcache_enabled) {\n"
+ "+\tif (ramster_enabled) {\n"
+ " \t\tunsigned int cpu;\n"
+ " \n"
+ "+\t\t(void)ramster_o2net_register_handlers();\n"
+ " \t\ttmem_register_hostops(&zcache_hostops);\n"
+ " \t\ttmem_register_pamops(&zcache_pamops);\n"
+ " \t\tret = register_cpu_notifier(&zcache_cpu_notifier_block);\n"
+ " \t\tif (ret) {\n"
+ "-\t\t\tpr_err(\"zcache: can't register cpu notifier\\n\");\n"
+ "+\t\t\tpr_err(\"ramster: can't register cpu notifier\\n\");\n"
+ " \t\t\tgoto out;\n"
+ " \t\t}\n"
+ " \t\tfor_each_online_cpu(cpu) {\n"
+ "@@ -1966,35 +3277,39 @@ static int __init zcache_init(void)\n"
+ " \t\t\t\tsizeof(struct tmem_objnode), 0, 0, NULL);\n"
+ " \tzcache_obj_cache = kmem_cache_create(\"zcache_obj\",\n"
+ " \t\t\t\tsizeof(struct tmem_obj), 0, 0, NULL);\n"
+ "-\tret = zcache_new_client(LOCAL_CLIENT);\n"
+ "-\tif (ret) {\n"
+ "-\t\tpr_err(\"zcache: can't create client\\n\");\n"
+ "-\t\tgoto out;\n"
+ "-\t}\n"
+ "+\tramster_flnode_cache = kmem_cache_create(\"ramster_flnode\",\n"
+ "+\t\t\t\tsizeof(struct flushlist_node), 0, 0, NULL);\n"
+ " #endif\n"
+ " #ifdef CONFIG_CLEANCACHE\n"
+ "-\tif (zcache_enabled && use_cleancache) {\n"
+ "+\tpr_info(\"INIT ramster_enabled=%d use_cleancache=%d\\n\",\n"
+ "+\t\t\t\t\tramster_enabled, use_cleancache);\n"
+ "+\tif (ramster_enabled && use_cleancache) {\n"
+ " \t\tstruct cleancache_ops old_ops;\n"
+ " \n"
+ " \t\tzbud_init();\n"
+ " \t\tregister_shrinker(&zcache_shrinker);\n"
+ " \t\told_ops = zcache_cleancache_register_ops();\n"
+ "-\t\tpr_info(\"zcache: cleancache enabled using kernel \"\n"
+ "+\t\tpr_info(\"ramster: cleancache enabled using kernel \"\n"
+ " \t\t\t\"transcendent memory and compression buddies\\n\");\n"
+ " \t\tif (old_ops.init_fs != NULL)\n"
+ "-\t\t\tpr_warning(\"zcache: cleancache_ops overridden\");\n"
+ "+\t\t\tpr_warning(\"ramster: cleancache_ops overridden\");\n"
+ " \t}\n"
+ " #endif\n"
+ " #ifdef CONFIG_FRONTSWAP\n"
+ "-\tif (zcache_enabled && use_frontswap) {\n"
+ "+\tpr_info(\"INIT ramster_enabled=%d use_frontswap=%d\\n\",\n"
+ "+\t\t\t\t\tramster_enabled, use_frontswap);\n"
+ "+\tif (ramster_enabled && use_frontswap) {\n"
+ " \t\tstruct frontswap_ops old_ops;\n"
+ " \n"
+ "+\t\tzcache_new_client(LOCAL_CLIENT);\n"
+ " \t\told_ops = zcache_frontswap_register_ops();\n"
+ "-\t\tpr_info(\"zcache: frontswap enabled using kernel \"\n"
+ "+\t\tpr_info(\"ramster: frontswap enabled using kernel \"\n"
+ " \t\t\t\"transcendent memory and xvmalloc\\n\");\n"
+ " \t\tif (old_ops.init != NULL)\n"
+ " \t\t\tpr_warning(\"ktmem: frontswap_ops overridden\");\n"
+ " \t}\n"
+ "+\tif (ramster_enabled && (use_frontswap || use_cleancache))\n"
+ "+\t\tramster_remotify_init();\n"
+ " #endif\n"
+ " out:\n"
+ " \treturn ret;\n"
+ "-- \n"
+ 1.7.1
 
-90870adf1d69b7b5967a97261c86ddc9cb6e0c11facb5f4fcd63533ec035ecc8
+5d403d027cb9861f53b2e7192022d04eac01505a7df43e2e8b1ac8ebec574da4

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.