From mboxrd@z Thu Jan 1 00:00:00 1970 From: jbrassow@sourceware.org Date: 23 Jan 2008 21:22:28 -0000 Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-clog.c Message-ID: <20080123212228.22744.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Branch: RHEL5 Changes by: jbrassow at sourceware.org 2008-01-23 21:22:28 Modified files: cmirror-kernel/src: dm-clog.c Log message: - remember CTR string so if userspace server dies and restarts, we can pick up where we left off. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-clog.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2.2.6&r2=1.2.2.7 --- cluster/cmirror-kernel/src/dm-clog.c 2008/01/21 20:37:03 1.2.2.6 +++ cluster/cmirror-kernel/src/dm-clog.c 2008/01/23 21:22:28 1.2.2.7 @@ -20,6 +20,9 @@ region_t region_count; char uuid[DM_UUID_LEN]; + char *ctr_str; /* Gives ability to restart if userspace dies */ + uint32_t ctr_size; + spinlock_t flush_lock; struct list_head flush_list; /* only for clear and mark requests */ }; @@ -36,6 +39,44 @@ kfree(element); } +int cluster_do_request(struct log_c *lc, const char *uuid, int request_type, + char *data, int data_size, char *rdata, int *rdata_size) +{ + int r; + + /* + * If the server isn't there, -ESRCH is returned, + * and we must keep trying until the server is + * restored. + */ +retry: + r = dm_clog_consult_server(uuid, request_type, data, + data_size, rdata, rdata_size); + + if (r != -ESRCH) + return r; + + DMERR(" Userspace cluster log server not found."); + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(2*HZ); + DMWARN("Attempting to contact cluster log server..."); + r = dm_clog_consult_server(uuid, DM_CLOG_CTR, lc->ctr_str, + lc->ctr_size, NULL, NULL); + if (!r) + break; + } + DMINFO("Reconnected to cluster log server... CTR complete"); + r = dm_clog_consult_server(uuid, DM_CLOG_RESUME, NULL, + 0, NULL, NULL); + if (!r) + goto retry; + + DMERR("Error trying to resume cluster log: %d", r); + + return -ESRCH; +} + static int cluster_ctr(struct dirty_log *log, struct dm_target *ti, unsigned int argc, char **argv, int disk_log) { @@ -96,12 +137,14 @@ if (r == -ESRCH) DMERR(" Userspace cluster log server not found"); - log->context = lc; - - if (r && lc) + if (r) { kfree(lc); - if (ctr_str) kfree(ctr_str); + } else { + lc->ctr_str = ctr_str; + lc->ctr_size = str_size; + log->context = lc; + } return r; } @@ -179,7 +222,7 @@ NULL, NULL); /* FIXME: What do we do on failure? */ - + kfree(lc->ctr_str); kfree(lc); return; @@ -266,9 +309,9 @@ struct log_c *lc = (struct log_c *)log->context; rdata_size = sizeof(is_clean); - r = dm_clog_consult_server(lc->uuid, DM_CLOG_IS_CLEAN, - (char *)®ion, sizeof(region), - (char *)&is_clean, &rdata_size); + r = cluster_do_request(lc, lc->uuid, DM_CLOG_IS_CLEAN, + (char *)®ion, sizeof(region), + (char *)&is_clean, &rdata_size); return (r) ? 0 : is_clean; } @@ -296,9 +339,9 @@ return -EWOULDBLOCK; rdata_size = sizeof(in_sync); - r = dm_clog_consult_server(lc->uuid, DM_CLOG_IN_SYNC, - (char *)®ion, sizeof(region), - (char *)&in_sync, &rdata_size); + r = cluster_do_request(lc, lc->uuid, DM_CLOG_IN_SYNC, + (char *)®ion, sizeof(region), + (char *)&in_sync, &rdata_size); return (r) ? 0 : in_sync; } @@ -343,17 +386,17 @@ */ list_for_each_entry(fe, &flush_list, list) { - r = dm_clog_consult_server(lc->uuid, fe->type, - (char *)&fe->region, - sizeof(fe->region), - NULL, NULL); + r = cluster_do_request(lc, lc->uuid, fe->type, + (char *)&fe->region, + sizeof(fe->region), + NULL, NULL); if (r) goto fail; } do { - r = dm_clog_consult_server(lc->uuid, DM_CLOG_FLUSH, - NULL, 0, NULL, NULL); + r = cluster_do_request(lc, lc->uuid, DM_CLOG_FLUSH, + NULL, 0, NULL, NULL); if (r != -EAGAIN) break; @@ -460,9 +503,9 @@ struct { int i; region_t r; } pkg; rdata_size = sizeof(pkg); - r = dm_clog_consult_server(lc->uuid, DM_CLOG_GET_RESYNC_WORK, - NULL, 0, - (char *)&pkg, &rdata_size); + r = cluster_do_request(lc, lc->uuid, DM_CLOG_GET_RESYNC_WORK, + NULL, 0, + (char *)&pkg, &rdata_size); *region = pkg.r; return (r) ? r : pkg.i; @@ -487,9 +530,9 @@ pkg.r = region; pkg.i = in_sync; - r = dm_clog_consult_server(lc->uuid, DM_CLOG_SET_REGION_SYNC, - (char *)&pkg, sizeof(pkg), - NULL, NULL); + r = cluster_do_request(lc, lc->uuid, DM_CLOG_SET_REGION_SYNC, + (char *)&pkg, sizeof(pkg), + NULL, NULL); /* FIXME: It would be nice to be able to report failures */ return; @@ -512,9 +555,9 @@ struct log_c *lc = (struct log_c *)log->context; rdata_size = sizeof(sync_count); - r = dm_clog_consult_server(lc->uuid, DM_CLOG_GET_SYNC_COUNT, - NULL, 0, - (char *)&sync_count, &rdata_size); + r = cluster_do_request(lc, lc->uuid, DM_CLOG_GET_SYNC_COUNT, + NULL, 0, + (char *)&sync_count, &rdata_size); return (r) ? 0 : sync_count; } @@ -537,18 +580,18 @@ switch(status_type) { case STATUSTYPE_INFO: - r = dm_clog_consult_server(lc->uuid, DM_CLOG_STATUS_INFO, - NULL, 0, - result, &sz); + r = cluster_do_request(lc, lc->uuid, DM_CLOG_STATUS_INFO, + NULL, 0, + result, &sz); /* * FIXME: If we fail to contact server, we should still * populate this with parsible results */ break; case STATUSTYPE_TABLE: - r = dm_clog_consult_server(lc->uuid, DM_CLOG_STATUS_TABLE, - NULL, 0, - result, &sz); + r = cluster_do_request(lc, lc->uuid, DM_CLOG_STATUS_TABLE, + NULL, 0, + result, &sz); break; } return (r) ? 0: sz; @@ -568,9 +611,9 @@ struct log_c *lc = (struct log_c *)log->context; rdata_size = sizeof(is_recovering); - r = dm_clog_consult_server(lc->uuid, DM_CLOG_IS_REMOTE_RECOVERING, - (char *)®ion, sizeof(region), - (char *)&is_recovering, &rdata_size); + r = cluster_do_request(lc, lc->uuid, DM_CLOG_IS_REMOTE_RECOVERING, + (char *)®ion, sizeof(region), + (char *)&is_recovering, &rdata_size); return (r) ? 1 : is_recovering; }