From mboxrd@z Thu Jan 1 00:00:00 1970 From: jbrassow@sourceware.org Date: 21 Jun 2006 01:41:46 -0000 Subject: [Cluster-devel] cluster cmirror-kernel/src/dm-clog-tfr.c cmirr ... Message-ID: <20060621014146.21156.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: jbrassow at sourceware.org 2006-06-21 01:41:43 Added files: cmirror-kernel/src: dm-clog-tfr.c dm-clog-tfr.h dm-clog.c Removed files: cmirror : README TODO cmirror/src : Makefile dm-cmirror-client.c dm-cmirror-cman.c dm-cmirror-cman.h dm-cmirror-common.h dm-cmirror-server.c dm-cmirror-server.h dm-cmirror-xfr.c dm-cmirror-xfr.h dm-log.h Log message: - This is the beginning of the cluster mirror log rewrite. The purpose is to work with the new CMAN/OpenAIS framework. The server moves to user-space. Will post description and RFC to cluster-devel. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/README.diff?cvsroot=cluster&r1=1.3&r2=NONE http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/TODO.diff?cvsroot=cluster&r1=1.6&r2=NONE http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/Makefile.diff?cvsroot=cluster&r1=1.2&r2=NONE http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-client.c.diff?cvsroot=cluster&r1=1.11&r2=NONE http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-cman.c.diff?cvsroot=cluster&r1=1.2&r2=NONE http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-cman.h.diff?cvsroot=cluster&r1=1.1&r2=NONE http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-common.h.diff?cvsroot=cluster&r1=1.6&r2=NONE http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-server.c.diff?cvsroot=cluster&r1=1.8&r2=NONE http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-server.h.diff?cvsroot=cluster&r1=1.2&r2=NONE http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-xfr.c.diff?cvsroot=cluster&r1=1.2&r2=NONE http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-cmirror-xfr.h.diff?cvsroot=cluster&r1=1.2&r2=NONE http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/dm-log.h.diff?cvsroot=cluster&r1=1.3&r2=NONE http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-clog-tfr.c.diff?cvsroot=cluster&r1=NONE&r2=1.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-clog-tfr.h.diff?cvsroot=cluster&r1=NONE&r2=1.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-clog.c.diff?cvsroot=cluster&r1=NONE&r2=1.1 /cvs/cluster/cluster/cmirror-kernel/src/dm-clog-tfr.c,v --> standard output revision 1.1 --- cluster/cmirror-kernel/src/dm-clog-tfr.c +++ - 2006-06-21 01:41:46.303327000 +0000 @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2006 Red Hat, Inc. + * + * This file is released under the LGPL. + */ + +#include "dm-clog-tfr.h" + +/* + * Pre-allocated nominal request area for speed + */ +#define DM_CLOG_NOMINAL_REQUEST_SIZE 512 +static char nominal_request[DM_CLOG_NOMINAL_REQUEST_SIZE]; + +static DECLARE_MUTEX(consult_server_lock); + +/* + * dm_clog_consult_server + * @uuid: log's uuid (must be MAX_NAME_LEN in size) + * @request_type: + * @data: data to tx to the server + * @data_size: size of data in bytes + * @rdata: place to put return data from server + * @rdata_size: value-result (amount of space given/amount of space used) + * + * Only one process at a time can communicate with the server. + * Possible error return values: + * +XXX: Server-side error + * -XXX: Client-side error + * -ENOSPC: Not enough space in rdata + * -ENOMEM: Unable to allocate memory to complete request + * -ESRCH: Unable to contact server + * EIO: Server unable to commit request + * + * Returns: 0 on success, otherwise failure + */ +int dm_clog_consult_server(const char *uuid, int request_type, + char *data, int data_size, + char *rdata, int *rdata_size) +{ + int r = 0; + struct clog_tfr *tfr = (struct clog_tfr *)nominal_request; + + mutex_lock(&consult_server_lock); + if (data_size > (DM_CLOG_NOMINAL_REQUEST_SIZE - sizeof(*tfr))) + /* FIXME: is kmalloc sufficient if we need this much space? */ + tfr = kmalloc(data_size + sizeof(*tfr), GFP_KERNEL); + + if (!tfr) + return -ENOMEM; + + memcpy(tfr->uuid, uuid, MAX_NAME_LEN); + tfr->request_type = request_type; + tfr->data_size = data_size; + + /* + * FIXME: Send to server + */ + + if (rdata) { + /* FIXME: receive from server */ + if (tfr->error) { + r = tfr->error; + } else if (tfr->data_size > *rdata_size) { + r = -ENOSPC; + } else { + *rdata_size = tfr->data_size; + memcpy(rdata, tft->data, tfr->data_size); + } + /* FIXME: If using netlink, we may wish to ack back */ + } else { + /* + * FIXME: If we are using netlink, we may want an + * ack from the server to know that it got the + * request. (Ack is implicit if we are receiving + * data.) + */ + } + r = ENOSYS; + + mutex_unlock(&consult_server_lock); + return r; +} /cvs/cluster/cluster/cmirror-kernel/src/dm-clog-tfr.h,v --> standard output revision 1.1 --- cluster/cmirror-kernel/src/dm-clog-tfr.h +++ - 2006-06-21 01:41:46.388855000 +0000 @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2006 Red Hat, Inc. + * + * This file is released under the LGPL. + */ + +#ifndef __DM_CLOG_TFR_H__ + +#define DM_CLOG_CTR 1 +#define DM_CLOG_DTR 2 +#define DM_CLOG_PRESUSPEND 3 +#define DM_CLOG_POSTSUSPEND 4 +#define DM_CLOG_RESUME 5 +#define DM_CLOG_GET_REGION_SIZE 6 +#define DM_CLOG_IS_CLEAN 7 +#define DM_CLOG_IS_REMOTE_RECOVERING 8 +#define DM_CLOG_IN_SYNC 9 +#define DM_CLOG_FLUSH 10 +#define DM_CLOG_MARK_REGION 11 +#define DM_CLOG_CLEAR_REGION 12 +#define DM_CLOG_GET_RESYNC_WORK 13 +#define DM_CLOG_SET_REGION_SYNC 14 +#define DM_CLOG_GET_SYNC_COUNT 15 +#define DM_CLOG_STATUS 16 +#define DM_CLOG_GET_FAILURE_RESPONSE 17 + +struct clog_tfr { + char uuid[MAX_NAME_LEN]; + int error; /* Used by server to inform of errors */ + int request_type; + int data_size; + char data[0]; +}; + + +int dm_clog_consult_server(const char *uuid, int request_type, + char *data, int data_size, + char *rdata, int *rdata_size); + +#endif /* __DM_CLOG_TFR_H__ */ /cvs/cluster/cluster/cmirror-kernel/src/dm-clog.c,v --> standard output revision 1.1 --- cluster/cmirror-kernel/src/dm-clog.c +++ - 2006-06-21 01:41:46.503450000 +0000 @@ -0,0 +1,436 @@ +/* + * Copyright (C) 2006 Red Hat, Inc. + * + * This file is released under the LGPL. + */ + +#include "dm-clog-tfr.h" + +struct flush_entry { + int type; + region_t region; + struct list_head list; +}; + +struct log_c { + struct dm_target *ti; + uint32_t region_size; + region_t region_count; + int failure_response; + char uuid[MAX_NAME_LEN]; + + spinlock_t flush_lock; + struct list_head flush_list; /* only for clear and mark requests */ +}; + +static mempool_t *flush_entry_pool = NULL; + +static void *flush_entry_alloc(int gfp_mask, void *pool_data) +{ + return kmalloc(sizeof(struct flush_entry), gfp_mask); +} + +static void flush_entry_free(void *element, void *pool_data) +{ + kfree(element); +} + +static int cluster_ctr(struct dirty_log *log, struct dm_target *ti, + unsigned int argc, char **argv, int disk_log) +{ + int i; + int r = 0; + int failure_response = FR_NONBLOCK; + struct log_c *lc = NULL; + uint32_t region_size; + region_t region_count; + + /* Already checked argument count */ + + /* Check for block_on_error. It must be present. */ + for (i = 1; i < argc; i++) { + if (!strcmp(argv[i], "block_on_error")) + failure_response = FR_BLOCK; + } + if (failure_response != FR_BLOCK) { + DMWARN("Required \"block_on_error\" argument not supplied."); + return -EINVAL; + } + + if (sscanf(argv[0], SECTOR_FORMAT, ®ion_size) != 1) { + DMWARN("Invalid region size string"); + return -EINVAL; + } + + region_count = dm_sector_div_up(ti->len, region_size); + + lc = kmalloc(sizeof(*lc), GFP_KERNEL); + if (!lc) { + DMWARN("Unable to allocate cluster log context."); + return -ENOMEM; + } + lc->ti = ti; + lc->region_size = region_size; + lc->region_count = region_count; + + /* FIXME: Send table string to server */ + +fail: + if (lc) + kfree(lc); + + return -ENOSYS; +} + +/* + * cluster_core_ctr + * @log + * @ti + * @argc + * @argv + * + * argv contains: + * [[no]sync] "block_on_error" + * + * Returns: 0 on success, -XXX on failure + */ +static int cluster_core_ctr(struct dirty_log *log, struct dm_target *ti, + unsigned int argc, char **argv) +{ + int i; + if ((argc < 3) || (argc > 4)) { + DMERR("Too %s arguments to clustered_core mirror log type.", + (argc < 3) ? "few" : "many"); + DMERR(" %d arguments supplied:", argc); + for (i = 0; i < argc; i++) + DMERR(" %s", argv[i]); + return -EINVAL; + } + + return cluster_ctr(log, ti, argc, argv, 0); +} + + +/* + * cluster_core_ctr + * @log + * @ti + * @argc + * @argv + * + * argv contains: + * [[no]sync] "block_on_error" + *--------------------------------------------------------------*/ +static int cluster_disk_ctr(struct dirty_log *log, struct dm_target *ti, + unsigned int argc, char **argv) +{ + int i; + if ((argc < 4) || (argc > 5)) { + DMERR("Too %s arguments to clustered_disk mirror log type.", + (argc < 4) ? "few" : "many"); + DMERR(" %d arguments supplied:", argc); + for (i = 0; i < argc; i++) + DMERR(" %s", argv[i]); + return -EINVAL; + } + + return cluster_ctr(log, ti, argc, argv, 1); +} + +static void cluster_dtr(struct dirty_log *log) +{ + struct log_c *lc = (struct log_c *)log->context; + + /* FIXME: Send shutdown to server */ + kfree(lc); + + return; +} + +static int cluster_presuspend(struct dirty_log *log) +{ + return -ENOSYS; +} + +static int cluster_postsuspend(struct dirty_log *log) +{ + return -ENOSYS; +} + +static int cluster_resume(struct dirty_log *log) +{ + return -ENOSYS; +} + +/* + * cluster_get_region_size + * @log + * + * Only called during mirror construction, ok to block. + * + * Returns: region size (doesn't fail) + */ +static uint32_t cluster_get_region_size(struct dirty_log *log) +{ + struct log_c *lc = (struct log_c *)log->context; + + return lc->region_size; +} + +static int cluster_is_clean(struct dirty_log *log, region_t region) +{ + return 0; /* not clean for now */ +} + +static int cluster_is_remote_recovering(struct dirty_log *log, region_t region) +{ + return 1; /* yes for now */ +} + +/* + * cluster_in_sync + * @log + * @region + * @can_block: if set, return immediately + * + * Returns: 1 if in-sync, 0 if not-in-sync, < 0 on error + */ +static int cluster_in_sync(struct dirty_log *log, region_t region, int can_block) +{ + if (!can_block) + return -EWOULDBLOCK; + + return 0; /* not in sync for now */ +} + +/* + * cluster_flush + * @log + * + * This function is ok to block. + * The flush happens in two stages. First, it sends all + * clear/mark requests that are on the list. Then it + * tells the server to commit them. This gives the + * server a chance to optimise the commit to the cluster + * and/or disk, instead of doing it for every request. + * + * Additionally, we could implement another thread that + * sends the requests up to the server - reducing the + * load on flush. Then the flush would have less in + * the list and be responsible for the finishing commit. + * + * Returns: 0 on success, < 0 on failure + */ +static int cluster_flush(struct dirty_log *log) +{ + int r = 0; + int flags; + region_t region; + struct log_c *lc = (struct log_c *)log->context; + struct list_head flush_list; + struct flush_entry *fe, *tmp_fe; + + spin_lock_irqsave(&lc->flush_lock, flags); + flush_list = lc->flush_list; + spin_unlock_irqrestore(&lc->flush_lock, flags); + + /* + * FIXME: Count up requests, group request types, + * allocate memory to stick all requests in and + * send to server in one go. Failing the allocation, + * do it one by one. + */ + + list_for_each_entry(fe, &flush_list, list) { + r = dm_clog_consult_server(lc->uuid, fe->type, + (char *)&fe->region, + sizeof(fe->region), + NULL, 0); + if (r) { + r = (r > 0) ? -r : r; + goto fail; + } + } + + r = dm_clog_consult_server(lc->uuid, DM_CLOG_FLUSH, + NULL, 0, NULL, 0); + if (r) + r = (r > 0) ? -r : r; + +fail: + list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) { + list_del(&fe->list); + mempool_free(fe, flush_entry_pool); + } + + r = -EIO; + + return r; +} + +/* + * cluster_mark_region + * @log + * @region + * + * This function should avoid blocking unless absolutely required. + * (Memory allocation is valid for blocking.) + */ +static void cluster_mark_region(struct dirty_log *log, region_t region) +{ + int flags; + struct log_c *lc = (struct log_c *)log->context; + struct flush_entry *fe; + + /* Wait for an allocation, but _never_ fail */ + fe = mempool_alloc(flush_enrty_pool, GFP_KERNEL); + BUG_ON(!fe); + + spin_lock_irqsave(&lc->flush_lock, flags); + fe->type = DM_CLOG_MARK_REGION; + fe->region = region; + list_add(&fe->list, &lc->flush_list); + spin_unlock_irqrestore(&lc->flush_lock, flags); + + return; +} + +/* + * cluster_clear_region + * @log + * @region + * + * This function must not block. + * So, the alloc can't block. In the worst case, it is ok to + * fail. It would simply mean we can't clear the region. + * Does nothing to current sync context, but does mean + * the region will be re-sync'ed on a reload of the mirror + * even though it is in-sync. + */ +static void cluster_clear_region(struct dirty_log *log, region_t region) +{ + int flags; + struct log_c *lc = (struct log_c *)log->context; + struct flush_entry *fe; + + fe = mempool_alloc(flush_enrty_pool, GFP_ATOMIC); + if (!fe) { + DMERR("Failed to allocate memory to clear region."); + return; + } + spin_lock_irqsave(&lc->flush_lock, flags); + fe->type = DM_CLOG_CLEAR_REGION; + fe->region = region; + list_add(&fe->list, &lc->flush_list); + spin_unlock_irqrestore(&lc->flush_lock, flags); + + return; +} + +static int cluster_get_resync_work(struct dirty_log *log, region_t *region) +{ + return -ENOSYS; +} + +static void cluster_set_region_sync(struct dirty_log *log, + region_t region, int in_sync) +{ + return; +} + +static region_t cluster_get_sync_count(struct dirty_log *log) +{ + return 0; +} + +static int cluster_status(struct dirty_log *log, status_type_t status_type, + char *result, unsigned int maxlen) +{ + return -ENOSYS; +} + +status int cluster_get_failure_response(struct dirty_log *log) +{ + struct log_c *lc = (struct log_c *)log->context; + + return lc->failure_response; +} + +static struct dirty_log_type _clustered_core_type = { + .name = "clustered_core", + .module = THIS_MODULE, + .ctr = cluster_core_ctr, + .dtr = cluster_dtr, + .presuspend = cluster_presuspend, + .postsuspend = cluster_postsuspend, + .resume = cluster_resume, + .get_region_size = cluster_get_region_size, + .is_clean = cluster_is_clean, + .is_remote_recovering = cluster_is_remote_recovering, + .in_sync = cluster_in_sync, + .flush = cluster_flush, + .mark_region = cluster_mark_region, + .clear_region = cluster_clear_region, + .get_resync_work = cluster_get_resync_work, + .set_region_sync = cluster_set_region_sync, + .get_sync_count = cluster_get_sync_count, + .status = cluster_status, + .get_failure_response = cluster_get_failure_response, +}; + +static struct dirty_log_type _clustered_disk_type = { + .name = "clustered_disk", + .module = THIS_MODULE, + .ctr = cluster_disk_ctr, + .dtr = cluster_dtr, + .presuspend = cluster_presuspend, + .postsuspend = cluster_postsuspend, + .resume = cluster_resume, + .get_region_size = cluster_get_region_size, + .is_clean = cluster_is_clean, + .is_remote_recovering = cluster_is_remote_recovering, + .in_sync = cluster_in_sync, + .flush = cluster_flush, + .mark_region = cluster_mark_region, + .clear_region = cluster_clear_region, + .get_resync_work = cluster_get_resync_work, + .set_region_sync = cluster_set_region_sync, + .get_sync_count = cluster_get_sync_count, + .status = cluster_status, + .get_failure_response = cluster_get_failure_response, +}; + +static int __init cluster_dirty_log_init(void) +{ + int r = 0; + + flush_entry_pool = mempool_create(100, flush_entry_alloc, + flush_entry_free, NULL); + + if (!flush_entry_pool) { + DMERR("Unable to create flush_entry_pool: No memory."); + return -ENOMEM; + } + + r = dm_register_dirty_log_type(&_clustered_core_type); + if (r) { + DMWARN("Couldn't register clustered_core dirty log type"); + return r; + } + + r = dm_register_dirty_log_type(&_clustered_disk_type); + if (r) { + DMWARN("Couldn't register clustered_disk dirty log type"); + dm_unregister_dirty_log_type(&_clustered_core_type); + return r; + } + + return r; +} + +static void __exit cluster_dirty_log_exit(void) +{ + dm_unregister_dirty_log_type(&_clustered_disk_type); + dm_unregister_dirty_log_type(&_clustered_core_type); + return; +}