From mboxrd@z Thu Jan  1 00:00:00 1970
From: jbrassow@sourceware.org <jbrassow@sourceware.org>
Date: 21 Jun 2006 01:41:46 -0000
Subject: [Cluster-devel] cluster cmirror-kernel/src/dm-clog-tfr.c cmirr ...
Message-ID: <20060621014146.21156.qmail@sourceware.org>
List-Id: <cluster-devel.redhat.com>
To: cluster-devel.redhat.com
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit

CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	jbrassow at sourceware.org	2006-06-21 01:41:43

Added files:
	cmirror-kernel/src: dm-clog-tfr.c dm-clog-tfr.h dm-clog.c 
Removed files:
	cmirror        : README TODO 
	cmirror/src    : Makefile dm-cmirror-client.c dm-cmirror-cman.c 
	                 dm-cmirror-cman.h dm-cmirror-common.h 
	                 dm-cmirror-server.c dm-cmirror-server.h 
	                 dm-cmirror-xfr.c dm-cmirror-xfr.h dm-log.h 

Log message:
	- This is the beginning of the cluster mirror log rewrite.  The
	purpose is to work with the new CMAN/OpenAIS framework.  The
	server moves to user-space.
	
	Will post description and RFC to cluster-devel.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/README.diff?cvsroot=cluster&r1=1.3&r2=NONE
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/TODO.diff?cvsroot=cluster&r1=1.6&r2=NONE
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/Makefile.diff?cvsroot=cluster&r1=1.2&r2=NONE
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-client.c.diff?cvsroot=cluster&r1=1.11&r2=NONE
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-cman.c.diff?cvsroot=cluster&r1=1.2&r2=NONE
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-cman.h.diff?cvsroot=cluster&r1=1.1&r2=NONE
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-common.h.diff?cvsroot=cluster&r1=1.6&r2=NONE
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-server.c.diff?cvsroot=cluster&r1=1.8&r2=NONE
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-server.h.diff?cvsroot=cluster&r1=1.2&r2=NONE
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-xfr.c.diff?cvsroot=cluster&r1=1.2&r2=NONE
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-xfr.h.diff?cvsroot=cluster&r1=1.2&r2=NONE
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-log.h.diff?cvsroot=cluster&r1=1.3&r2=NONE
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-clog-tfr.c.diff?cvsroot=cluster&r1=NONE&r2=1.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-clog-tfr.h.diff?cvsroot=cluster&r1=NONE&r2=1.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-clog.c.diff?cvsroot=cluster&r1=NONE&r2=1.1

/cvs/cluster/cluster/cmirror-kernel/src/dm-clog-tfr.c,v  -->  standard output
revision 1.1
--- cluster/cmirror-kernel/src/dm-clog-tfr.c
+++ -	2006-06-21 01:41:46.303327000 +0000
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2006 Red Hat, Inc.
+ *
+ * This file is released under the LGPL.
+ */
+
+#include "dm-clog-tfr.h"
+
+/*
+ * Pre-allocated nominal request area for speed
+ */
+#define DM_CLOG_NOMINAL_REQUEST_SIZE 512
+static char nominal_request[DM_CLOG_NOMINAL_REQUEST_SIZE];
+
+static DECLARE_MUTEX(consult_server_lock);
+
+/*
+ * dm_clog_consult_server
+ * @uuid: log's uuid (must be MAX_NAME_LEN in size)
+ * @request_type:
+ * @data: data to tx to the server
+ * @data_size: size of data in bytes
+ * @rdata: place to put return data from server
+ * @rdata_size: value-result (amount of space given/amount of space used)
+ *
+ * Only one process at a time can communicate with the server.
+ * Possible error return values:
+ *   +XXX:       Server-side error
+ *   -XXX:       Client-side error
+ *   -ENOSPC:    Not enough space in rdata
+ *   -ENOMEM:    Unable to allocate memory to complete request
+ *   -ESRCH:     Unable to contact server
+ *   EIO:        Server unable to commit request
+ *
+ * Returns: 0 on success, otherwise failure
+ */
+int dm_clog_consult_server(const char *uuid, int request_type,
+			   char *data, int data_size,
+			   char *rdata, int *rdata_size)
+{
+	int r = 0;
+	struct clog_tfr *tfr = (struct clog_tfr *)nominal_request;
+
+	mutex_lock(&consult_server_lock);
+	if (data_size > (DM_CLOG_NOMINAL_REQUEST_SIZE - sizeof(*tfr)))
+		/* FIXME: is kmalloc sufficient if we need this much space? */
+		tfr = kmalloc(data_size + sizeof(*tfr), GFP_KERNEL);
+
+	if (!tfr)
+		return -ENOMEM;
+
+	memcpy(tfr->uuid, uuid, MAX_NAME_LEN);
+	tfr->request_type = request_type;
+	tfr->data_size = data_size;
+
+	/*
+	 * FIXME: Send to server
+	 */
+
+	if (rdata) {
+		/* FIXME: receive from server */
+		if (tfr->error) {
+			r = tfr->error;
+		} else if (tfr->data_size > *rdata_size) {
+			r = -ENOSPC;
+		} else {
+			*rdata_size = tfr->data_size;
+			memcpy(rdata, tft->data, tfr->data_size);
+		}
+		/* FIXME:  If using netlink, we may wish to ack back */
+	} else {
+		/*
+		 * FIXME: If we are using netlink, we may want an
+		 * ack from the server to know that it got the
+		 * request.  (Ack is implicit if we are receiving
+		 * data.)
+		 */
+	}
+	r = ENOSYS;
+
+	mutex_unlock(&consult_server_lock);
+	return r;
+}
/cvs/cluster/cluster/cmirror-kernel/src/dm-clog-tfr.h,v  -->  standard output
revision 1.1
--- cluster/cmirror-kernel/src/dm-clog-tfr.h
+++ -	2006-06-21 01:41:46.388855000 +0000
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2006 Red Hat, Inc.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef __DM_CLOG_TFR_H__
+
+#define DM_CLOG_CTR                    1
+#define DM_CLOG_DTR                    2
+#define DM_CLOG_PRESUSPEND             3
+#define DM_CLOG_POSTSUSPEND            4
+#define DM_CLOG_RESUME                 5
+#define DM_CLOG_GET_REGION_SIZE        6
+#define DM_CLOG_IS_CLEAN               7
+#define DM_CLOG_IS_REMOTE_RECOVERING   8
+#define DM_CLOG_IN_SYNC                9
+#define DM_CLOG_FLUSH                 10
+#define DM_CLOG_MARK_REGION           11
+#define DM_CLOG_CLEAR_REGION          12
+#define DM_CLOG_GET_RESYNC_WORK       13
+#define DM_CLOG_SET_REGION_SYNC       14
+#define DM_CLOG_GET_SYNC_COUNT        15
+#define DM_CLOG_STATUS                16
+#define DM_CLOG_GET_FAILURE_RESPONSE  17
+
+struct clog_tfr {
+	char uuid[MAX_NAME_LEN];
+	int error;               /* Used by server to inform of errors */
+	int request_type;
+	int data_size;
+	char data[0];
+};
+
+
+int dm_clog_consult_server(const char *uuid, int request_type,
+			   char *data, int data_size,
+			   char *rdata, int *rdata_size);
+
+#endif /* __DM_CLOG_TFR_H__ */
/cvs/cluster/cluster/cmirror-kernel/src/dm-clog.c,v  -->  standard output
revision 1.1
--- cluster/cmirror-kernel/src/dm-clog.c
+++ -	2006-06-21 01:41:46.503450000 +0000
@@ -0,0 +1,436 @@
+/*
+ * Copyright (C) 2006 Red Hat, Inc.
+ *
+ * This file is released under the LGPL.
+ */
+
+#include "dm-clog-tfr.h"
+
+struct flush_entry {
+	int type;
+	region_t region;
+	struct list_head list;
+};
+
+struct log_c {
+	struct dm_target *ti;
+	uint32_t region_size;
+	region_t region_count;
+	int failure_response;
+	char uuid[MAX_NAME_LEN];
+
+	spinlock_t flush_lock;
+	struct list_head flush_list;  /* only for clear and mark requests */
+};
+
+static mempool_t *flush_entry_pool = NULL;
+
+static void *flush_entry_alloc(int gfp_mask, void *pool_data)
+{
+	return kmalloc(sizeof(struct flush_entry), gfp_mask);
+}
+
+static void flush_entry_free(void *element, void *pool_data)
+{
+	kfree(element);
+}
+
+static int cluster_ctr(struct dirty_log *log, struct dm_target *ti,
+		       unsigned int argc, char **argv, int disk_log)
+{
+	int i;
+	int r = 0;
+	int failure_response = FR_NONBLOCK;
+	struct log_c *lc = NULL;
+	uint32_t region_size;
+	region_t region_count;
+
+	/* Already checked argument count */
+
+	/* Check for block_on_error.  It must be present. */
+	for (i = 1; i < argc; i++) {
+		if (!strcmp(argv[i], "block_on_error"))
+			failure_response = FR_BLOCK;
+	}
+	if (failure_response != FR_BLOCK) {
+		DMWARN("Required \"block_on_error\" argument not supplied.");
+		return -EINVAL;
+	}
+
+	if (sscanf(argv[0], SECTOR_FORMAT, &region_size) != 1) {
+		DMWARN("Invalid region size string");
+		return -EINVAL;
+	}
+
+	region_count = dm_sector_div_up(ti->len, region_size);
+
+	lc = kmalloc(sizeof(*lc), GFP_KERNEL);
+	if (!lc) {
+		DMWARN("Unable to allocate cluster log context.");
+		return -ENOMEM;
+	}
+	lc->ti = ti;
+	lc->region_size = region_size;
+	lc->region_count = region_count;
+
+	/* FIXME: Send table string to server */
+
+fail:
+	if (lc)
+		kfree(lc);
+	
+	return -ENOSYS;
+}
+
+/*
+ * cluster_core_ctr
+ * @log
+ * @ti
+ * @argc
+ * @argv
+ *
+ * argv contains:
+ *   <region_size> <uuid> [[no]sync] "block_on_error"
+ *
+ * Returns: 0 on success, -XXX on failure
+ */
+static int cluster_core_ctr(struct dirty_log *log, struct dm_target *ti,
+			    unsigned int argc, char **argv)
+{
+	int i;
+	if ((argc < 3) || (argc > 4)) {
+		DMERR("Too %s arguments to clustered_core mirror log type.",
+		      (argc < 3) ? "few" : "many");
+		DMERR("  %d arguments supplied:", argc);
+		for (i = 0; i < argc; i++)
+			DMERR("    %s", argv[i]);
+		return -EINVAL;
+	}
+
+	return cluster_ctr(log, ti, argc, argv, 0);
+}
+
+
+/*
+ * cluster_core_ctr
+ * @log
+ * @ti
+ * @argc
+ * @argv
+ *
+ * argv contains:
+ *   <disk> <region_size> <uuid> [[no]sync] "block_on_error"
+ *--------------------------------------------------------------*/
+static int cluster_disk_ctr(struct dirty_log *log, struct dm_target *ti,
+			    unsigned int argc, char **argv)
+{
+	int i;
+	if ((argc < 4) || (argc > 5)) {
+		DMERR("Too %s arguments to clustered_disk mirror log type.",
+		      (argc < 4) ? "few" : "many");
+		DMERR("  %d arguments supplied:", argc);
+		for (i = 0; i < argc; i++)
+			DMERR("    %s", argv[i]);
+		return -EINVAL;
+	}
+
+	return cluster_ctr(log, ti, argc, argv, 1);
+}
+
+static void cluster_dtr(struct dirty_log *log)
+{
+	struct log_c *lc = (struct log_c *)log->context;
+
+	/* FIXME: Send shutdown to server */
+	kfree(lc);
+
+	return;
+}
+
+static int cluster_presuspend(struct dirty_log *log)
+{
+	return -ENOSYS;
+}
+
+static int cluster_postsuspend(struct dirty_log *log)
+{
+	return -ENOSYS;
+}
+
+static int cluster_resume(struct dirty_log *log)
+{
+	return -ENOSYS;
+}
+
+/*
+ * cluster_get_region_size
+ * @log
+ *
+ * Only called during mirror construction, ok to block.
+ *
+ * Returns: region size (doesn't fail)
+ */
+static uint32_t cluster_get_region_size(struct dirty_log *log)
+{
+	struct log_c *lc = (struct log_c *)log->context;
+
+	return lc->region_size;
+}
+
+static int cluster_is_clean(struct dirty_log *log, region_t region)
+{
+	return 0; /* not clean for now */
+}
+
+static int cluster_is_remote_recovering(struct dirty_log *log, region_t region)
+{
+	return 1; /* yes for now */
+}
+
+/*
+ * cluster_in_sync
+ * @log
+ * @region
+ * @can_block: if set, return immediately
+ *
+ * Returns: 1 if in-sync, 0 if not-in-sync, < 0 on error
+ */
+static int cluster_in_sync(struct dirty_log *log, region_t region, int can_block)
+{
+	if (!can_block)
+		return -EWOULDBLOCK;
+
+	return 0; /* not in sync for now */
+}
+
+/*
+ * cluster_flush
+ * @log
+ *
+ * This function is ok to block.
+ * The flush happens in two stages.  First, it sends all
+ * clear/mark requests that are on the list.  Then it
+ * tells the server to commit them.  This gives the
+ * server a chance to optimise the commit to the cluster
+ * and/or disk, instead of doing it for every request.
+ *
+ * Additionally, we could implement another thread that
+ * sends the requests up to the server - reducing the
+ * load on flush.  Then the flush would have less in
+ * the list and be responsible for the finishing commit.
+ *
+ * Returns: 0 on success, < 0 on failure
+ */
+static int cluster_flush(struct dirty_log *log)
+{
+	int r = 0;
+	int flags;
+	region_t region;
+	struct log_c *lc = (struct log_c *)log->context;
+	struct list_head flush_list;
+	struct flush_entry *fe, *tmp_fe;
+
+	spin_lock_irqsave(&lc->flush_lock, flags);
+	flush_list = lc->flush_list;
+	spin_unlock_irqrestore(&lc->flush_lock, flags);
+
+	/*
+	 * FIXME: Count up requests, group request types,
+	 * allocate memory to stick all requests in and
+	 * send to server in one go.  Failing the allocation,
+	 * do it one by one.
+	 */
+
+	list_for_each_entry(fe, &flush_list, list) {
+		r = dm_clog_consult_server(lc->uuid, fe->type,
+					   (char *)&fe->region,
+					   sizeof(fe->region),
+					   NULL, 0);
+		if (r) {
+			r = (r > 0) ? -r : r;
+			goto fail;
+		}
+	}
+
+	r = dm_clog_consult_server(lc->uuid, DM_CLOG_FLUSH,
+				   NULL, 0, NULL, 0);
+	if (r)
+		r = (r > 0) ? -r : r;
+
+fail:
+	list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) {
+		list_del(&fe->list);
+		mempool_free(fe, flush_entry_pool);
+	}
+
+	r = -EIO;
+
+	return r;
+}
+
+/*
+ * cluster_mark_region
+ * @log
+ * @region
+ *
+ * This function should avoid blocking unless absolutely required.
+ * (Memory allocation is valid for blocking.)
+ */
+static void cluster_mark_region(struct dirty_log *log, region_t region)
+{
+	int flags;
+	struct log_c *lc = (struct log_c *)log->context;
+	struct flush_entry *fe;
+
+	/* Wait for an allocation, but _never_ fail */
+	fe = mempool_alloc(flush_enrty_pool, GFP_KERNEL);
+	BUG_ON(!fe);
+
+	spin_lock_irqsave(&lc->flush_lock, flags);
+	fe->type = DM_CLOG_MARK_REGION;
+	fe->region = region;
+	list_add(&fe->list, &lc->flush_list);
+	spin_unlock_irqrestore(&lc->flush_lock, flags);
+		
+	return;
+}
+
+/*
+ * cluster_clear_region
+ * @log
+ * @region
+ *
+ * This function must not block.
+ * So, the alloc can't block.  In the worst case, it is ok to
+ * fail.  It would simply mean we can't clear the region.
+ * Does nothing to current sync context, but does mean
+ * the region will be re-sync'ed on a reload of the mirror
+ * even though it is in-sync.
+ */
+static void cluster_clear_region(struct dirty_log *log, region_t region)
+{
+	int flags;
+	struct log_c *lc = (struct log_c *)log->context;
+	struct flush_entry *fe;
+
+	fe = mempool_alloc(flush_enrty_pool, GFP_ATOMIC);
+	if (!fe) {
+		DMERR("Failed to allocate memory to clear region.");
+		return;
+	}
+	spin_lock_irqsave(&lc->flush_lock, flags);
+	fe->type = DM_CLOG_CLEAR_REGION;
+	fe->region = region;
+	list_add(&fe->list, &lc->flush_list);
+	spin_unlock_irqrestore(&lc->flush_lock, flags);
+	
+	return;
+}
+
+static int cluster_get_resync_work(struct dirty_log *log, region_t *region)
+{
+	return -ENOSYS;
+}
+
+static void cluster_set_region_sync(struct dirty_log *log,
+				    region_t region, int in_sync)
+{
+	return;
+}
+
+static region_t cluster_get_sync_count(struct dirty_log *log)
+{
+	return 0;
+}
+
+static int cluster_status(struct dirty_log *log, status_type_t status_type,
+			  char *result, unsigned int maxlen)
+{
+	return -ENOSYS;
+}
+
+status int cluster_get_failure_response(struct dirty_log *log)
+{
+	struct log_c *lc = (struct log_c *)log->context;
+
+	return lc->failure_response;
+}
+
+static struct dirty_log_type _clustered_core_type = {
+	.name = "clustered_core",
+	.module = THIS_MODULE,
+	.ctr = cluster_core_ctr,
+	.dtr = cluster_dtr,
+	.presuspend = cluster_presuspend,
+	.postsuspend = cluster_postsuspend,
+	.resume = cluster_resume,
+	.get_region_size = cluster_get_region_size,
+	.is_clean = cluster_is_clean,
+	.is_remote_recovering = cluster_is_remote_recovering,
+	.in_sync = cluster_in_sync,
+	.flush = cluster_flush,
+	.mark_region = cluster_mark_region,
+	.clear_region = cluster_clear_region,
+	.get_resync_work = cluster_get_resync_work,
+	.set_region_sync = cluster_set_region_sync,
+	.get_sync_count = cluster_get_sync_count,
+	.status = cluster_status,
+	.get_failure_response = cluster_get_failure_response,
+};
+
+static struct dirty_log_type _clustered_disk_type = {
+	.name = "clustered_disk",
+	.module = THIS_MODULE,
+	.ctr = cluster_disk_ctr,
+	.dtr = cluster_dtr,
+	.presuspend = cluster_presuspend,
+	.postsuspend = cluster_postsuspend,
+	.resume = cluster_resume,
+	.get_region_size = cluster_get_region_size,
+	.is_clean = cluster_is_clean,
+	.is_remote_recovering = cluster_is_remote_recovering,
+	.in_sync = cluster_in_sync,
+	.flush = cluster_flush,
+	.mark_region = cluster_mark_region,
+	.clear_region = cluster_clear_region,
+	.get_resync_work = cluster_get_resync_work,
+	.set_region_sync = cluster_set_region_sync,
+	.get_sync_count = cluster_get_sync_count,
+	.status = cluster_status,
+	.get_failure_response = cluster_get_failure_response,
+};
+
+static int __init cluster_dirty_log_init(void)
+{
+	int r = 0;
+
+	flush_entry_pool = mempool_create(100, flush_entry_alloc,
+					  flush_entry_free, NULL);
+
+	if (!flush_entry_pool) {
+		DMERR("Unable to create flush_entry_pool:  No memory.");
+		return -ENOMEM;
+	}
+
+	r = dm_register_dirty_log_type(&_clustered_core_type);
+	if (r) {
+		DMWARN("Couldn't register clustered_core dirty log type");
+		return r;
+	}
+
+	r = dm_register_dirty_log_type(&_clustered_disk_type);
+	if (r) {
+		DMWARN("Couldn't register clustered_disk dirty log type");
+		dm_unregister_dirty_log_type(&_clustered_core_type);
+		return r;
+	}
+
+	return r;
+}
+
+static void __exit cluster_dirty_log_exit(void)
+{
+	dm_unregister_dirty_log_type(&_clustered_disk_type);
+	dm_unregister_dirty_log_type(&_clustered_core_type);
+	return;
+}