From: jbrassow@sourceware.org <jbrassow@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c ...
Date: 22 Jul 2006 22:19:35 -0000 [thread overview]
Message-ID: <20060722221935.25977.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: jbrassow at sourceware.org 2006-07-22 22:19:34
Modified files:
cmirror-kernel/src: dm-cmirror-client.c dm-cmirror-common.h
dm-cmirror-server.c
Log message:
Fix for bug:
199862 - Suspending cluster mirrors can cause indefinite hang
And is part of a fix for:
199185 - 'lvconvert' fails to remove device-mapper devices ...
198555 - mirror log not getting cleared causes new mirror ...
And is likely to fix:
199334 - cmirror removal attempt hangs and caused locking ...
And will certainly help for:
199498
198821
194137
194125
199635
All of the above bugs will need to be reexamined when the packages
are rebuilt.
This fix allows the log server to migrate to other nodes during
suspension. This prevents the situation where the log server may
have it's devices suspended when it recieves a request. Trying to
fulfill a log request while devices are suspended will lead to an
indefinite hang, because I/O will not complete until the devices
are unsuspended.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-client.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.21&r2=1.1.2.22
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-common.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.8&r2=1.1.2.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-server.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.13&r2=1.1.2.14
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c 2006/07/07 17:08:56 1.1.2.21
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c 2006/07/22 22:19:34 1.1.2.22
@@ -243,7 +243,7 @@
-static int run_election(struct log_c *lc){
+static int run_election(struct log_c *lc, uint32_t initial_server){
int error=0, len;
struct sockaddr_in saddr_in;
struct msghdr msg;
@@ -255,7 +255,7 @@
lr.lr_type = LRT_ELECTION;
lr.u.lr_starter = my_id;
- lr.u.lr_coordinator = my_id;
+ lr.u.lr_coordinator = initial_server;
memcpy(lr.lr_uuid, lc->uuid, MAX_NAME_LEN);
memset(&saddr_in, 0, sizeof(struct sockaddr_cl));
@@ -420,7 +420,6 @@
if(len <= 0){
/* ATTENTION -- what do we do with this ? */
-// DMWARN("Failed to recvmsg from clustered log server");
error = len;
*retry = 1;
goto fail;
@@ -435,16 +434,11 @@
}
if(lr->u.lr_int_rtn == -ENXIO){
- DMWARN("server tells us it no longer controls the log");
lc->server_id = 0xDEAD;
*retry = 1;
goto fail;
}
- if(lr->u.lr_int_rtn < 0){
- DMWARN("an error occured on the server while processing our request");
- }
-
if(result)
*result = lr->u.lr_region_rtn;
@@ -463,8 +457,9 @@
}
if(lr) kfree(lr);
-#ifdef DEBUG
- DMWARN("Request (%s) to server failed :: %d",
+#if 0
+ DMINFO("My (%u) request (%s) to server (%u) failed :: %d",
+ my_id,
(type == LRT_IS_CLEAN)? "LRT_IS_CLEAN":
(type == LRT_IN_SYNC)? "LRT_IN_SYNC":
(type == LRT_MARK_REGION)? "LRT_MARK_REGION":
@@ -475,7 +470,7 @@
(type == LRT_MASTER_LEAVING)? "LRT_MASTER_LEAVING":
(type == LRT_ELECTION)? "LRT_ELECTION":
(type == LRT_SELECTION)? "LRT_SELECTION": "UNKNOWN",
- error);
+ lc->server_id, error);
#endif
return error;
}
@@ -495,9 +490,13 @@
do{
retry = 0;
suspend_on(&suspend_client_queue, atomic_read(&suspend_client));
+ if ((type == LRT_MASTER_LEAVING) && (lc->server_id == 0xDEAD)) {
+ /* Nothing to do */
+ goto out;
+ }
election:
while(lc->server_id == 0xDEAD){
- run_election(lc);
+ run_election(lc, my_id);
new_server = 1;
}
@@ -539,7 +538,7 @@
spin_unlock(®ion_state_lock);
goto out;
} else {
- DMWARN("Continuing request:: %s",
+ DMINFO("Continuing request:: %s",
(type == LRT_IS_CLEAN)? "LRT_IS_C LEAN":
(type == LRT_IN_SYNC)? "LRT_IN_SYNC":
(type == LRT_MARK_REGION)? "LRT_MARK_REGION":
@@ -737,14 +736,14 @@
struct log_c *lc = (struct log_c *) log->context;
if (!list_empty(&clear_region_list))
- DMERR("LEAVING WHILE REGION REQUESTS REMAIN.");
+ DMINFO("Leaving while clear region requests remain.");
list_del_init(&lc->log_list);
if(lc->server_id == my_id)
consult_server(lc, 0, LRT_MASTER_LEAVING, NULL);
sock_release(lc->client_sock);
- if (lc->log_dev)
+ if (lc->log_dev)
disk_dtr(log);
else
core_dtr(log);
@@ -755,15 +754,13 @@
static int cluster_presuspend(struct dirty_log *log)
{
- struct log_c *lc = (struct log_c *) log->context;
+ return 0;
+}
- /*
- atomic_set(&lc->suspended, 1);
+static int cluster_postsuspend(struct dirty_log *log)
+{
+ struct log_c *lc = (struct log_c *) log->context;
- if (lc->log_dev && lc->log_dev_failed)
- complete(&lc->failure_completion);
- else
- */
while (1) {
spin_lock(®ion_state_lock);
if (list_empty(&clear_region_list)) {
@@ -775,20 +772,31 @@
/* Just an unnessesary call to clear out regions */
consult_server(lc, 0, LRT_IN_SYNC, NULL);
}
+ atomic_set(&lc->suspended, 1);
+ if(lc->server_id == my_id) {
+ while (1) {
+ consult_server(lc, 0, LRT_MASTER_LEAVING, NULL);
+ down(&consult_server_lock);
+ run_election(lc, 0xDEAD);
+ up(&consult_server_lock);
+ if (lc->server_id == my_id) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ/4);
+ } else {
+ break;
+ }
+ }
+ }
return 0;
}
-static int cluster_postsuspend(struct dirty_log *log){
- return 0;
-}
-
static int cluster_resume(struct dirty_log *log){
struct log_c *lc = (struct log_c *) log->context;
lc->sync_search = 0;
resume_server_requests();
- /* atomic_set(&lc->suspended, 0); */
+ atomic_set(&lc->suspended, 0);
return 0;
}
@@ -1310,7 +1318,7 @@
.get_failure_response = cluster_get_failure_response,
};
-#define CMIRROR_RELEASE_NAME "0.1.0"
+#define CMIRROR_RELEASE_NAME "0.2.0"
static int __init cluster_dirty_log_init(void)
{
int r = 0;
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-common.h 2006/06/29 19:48:01 1.1.2.8
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-common.h 2006/07/22 22:19:34 1.1.2.9
@@ -13,10 +13,12 @@
sector_t sector;
sector_t count;
};
+int dm_io_get(unsigned int num_pages);
+void dm_io_put(unsigned int num_pages);
int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
void *data, unsigned long *error_bits);
/* from dm.h */
-#define DM_NAME "device-mapper"
+#define DM_NAME "dm-cmirror"
#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x)
#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x)
#define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x)
@@ -110,8 +112,8 @@
* Disk log fields
*/
int log_dev_failed;
- /*
atomic_t suspended;
+ /*
struct completion failure_completion;
*/
struct dm_dev *log_dev;
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2006/07/19 14:39:12 1.1.2.13
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2006/07/22 22:19:34 1.1.2.14
@@ -197,7 +197,7 @@
return 0;
return dm_io_sync_vm(1, &log->bits_location, WRITE,
- log->clean_bits, &ebits);
+ log->clean_bits, &ebits);
}
static int count_bits32(uint32_t *addr, unsigned size)
@@ -656,23 +656,41 @@
return -1;
}
-
- if((lr->lr_type == LRT_MASTER_LEAVING) &&
- (lr->u.lr_starter == my_id) &&
- lr->u.lr_node_count){
- lr->u.lr_coordinator = 0xDEAD;
- if(!(saddr->sin_addr.s_addr = nodeid_to_ipaddr(lr->u.lr_starter))){
- return -1;
+ if(lr->lr_type == LRT_MASTER_LEAVING) {
+ /*
+ * if we started this and (lr->u.lr_node_count != 0),
+ * then we have told everyone that we are leaving
+ */
+ if ((lr->u.lr_starter == my_id) && lr->u.lr_node_count){
+ lr->u.lr_coordinator = 0xDEAD;
+ if(!(saddr->sin_addr.s_addr = nodeid_to_ipaddr(lr->u.lr_starter))){
+ return -1;
+ }
+ saddr->sin_port = lr->u.lr_starter_port;
+ return 0;
}
- saddr->sin_port = lr->u.lr_starter_port;
- return 0;
}
-
+
+ /*
+ * Check if we have access to the log. We may not
+ * get have loaded this device.
+ */
if(!lc){
lr->u.lr_node_count++;
return 0;
}
-
+
+ if(lr->lr_type == LRT_MASTER_LEAVING){
+ lc->server_id = 0xDEAD;
+ lr->u.lr_node_count++;
+ return 0;
+ }
+
+ /*
+ * New node joins and needs to know I am the server
+ * We shortcut the election here and respond directly
+ * to the inquirer
+ */
if(lc->server_id == my_id){
lr->u.lr_coordinator = my_id;
if(!(saddr->sin_addr.s_addr = nodeid_to_ipaddr(lr->u.lr_starter))){
@@ -681,16 +699,16 @@
saddr->sin_port = lr->u.lr_starter_port;
return 0;
}
-
-
- if(lr->lr_type == LRT_MASTER_LEAVING){
- lc->server_id = 0xDEAD;
- lr->u.lr_node_count++;
- return 0;
- }
-
+
if(lr->lr_type == LRT_ELECTION){
if((lr->u.lr_starter == my_id) && (lr->u.lr_node_count)){
+ /*
+ * We started this election, and we've been
+ * around the loop. If the node count hasn't
+ * changed since we started, we can proceed to
+ * selection. Otherwise, go again setting
+ * ourself as the leader to start.
+ */
if(node_count == lr->u.lr_node_count){
lr->lr_type = LRT_SELECTION;
} else {
@@ -700,9 +718,23 @@
return 0;
}
+ /*
+ * We are in the election phase, so
+ * if we have the lowest ID so far,
+ * we elect ourselves for server.
+ *
+ * However, if the mirror is being suspended
+ * (lc->suspended), then we leave the current
+ * coordinator in place.
+ *
+ * The client must not set lc->suspended until
+ * it has completed sending all requests. That
+ * way, everyone is done sending requests when
+ * the last server is stuck holding the ball.
+ */
lr->u.lr_node_count++;
- if(my_id < lr->u.lr_coordinator){
+ if((my_id < lr->u.lr_coordinator) && !atomic_read(&lc->suspended)){
lr->u.lr_coordinator = my_id;
}
return 0;
@@ -712,6 +744,13 @@
return 0;
}
+ /*
+ * Need to restart election if someone
+ * has joined since we started.
+ *
+ * Here, we are the started, so set
+ * node_count = 1
+ */
if(lr->u.lr_node_count == node_count){
lr->lr_type = LRT_MASTER_ASSIGN;
} else {
@@ -721,12 +760,24 @@
}
lr->u.lr_node_count = 1;
} else if(lr->lr_type == LRT_MASTER_ASSIGN){
+ /*
+ * If we are the server, assign it
+ */
if(lr->u.lr_coordinator == my_id){
lc->server_id = my_id;
}
+
+ /*
+ * Continue around the loop
+ */
if(lr->u.lr_starter != my_id){
return 0;
}
+
+ /*
+ * If I was the one who asked for the election,
+ * the send the results back to the client
+ */
if(!(saddr->sin_addr.s_addr = nodeid_to_ipaddr(lr->u.lr_starter))){
return -1;
}
@@ -800,18 +851,15 @@
}
if(!lc){
- DMWARN("Log context can not be found for request");
lr.u.lr_int_rtn = -ENXIO;
goto reply;
}
-/*
- if(lc->server_id != my_id){
- DMWARN("I am not the server for this request");
- lr.u.lr_int_rtn = -ENXIO;
- goto reply;
- }
-*/
+ if (lc->server_id != my_id) {
+ lr.u.lr_int_rtn = -ENXIO;
+ goto reply;
+ }
+
switch(lr.lr_type){
case LRT_IS_CLEAN:
error = server_is_clean(lc, &lr);
@@ -860,12 +908,23 @@
}
/* ATTENTION -- if error? */
+/*
if(error){
- DMWARN("Error (%d) while processing request (type = %d)",
- error, lr.lr_type);
+ DMWARN("Error (%d) while processing request (%s)",
+ error,
+ (lr.lr_type == LRT_IS_CLEAN)? "LRT_IS_C LEAN":
+ (lr.lr_type == LRT_IN_SYNC)? "LRT_IN_SYNC":
+ (lr.lr_type == LRT_MARK_REGION)? "LRT_MARK_REGION":
+ (lr.lr_type == LRT_GET_RESYNC_WORK)? "LRT_GET_RESYNC_WORK":
+ (lr.lr_type == LRT_GET_SYNC_COUNT)? "LRT_GET_SYNC_COUNT":
+ (lr.lr_type == LRT_CLEAR_REGION)? "LRT_CLEAR_REGION":
+ (lr.lr_type == LRT_COMPLETE_RESYNC_WORK)? "LRT_COMPLETE_RESYNC_WORK":
+ (lr.lr_type == LRT_MASTER_LEAVING)? "LRT_MASTER_LEAVING":
+ (lr.lr_type == LRT_ELECTION)? "LRT_ELECTION":
+ (lr.lr_type == LRT_SELECTION)? "LRT_SELECTION": "UNKNOWN");
lr.u.lr_int_rtn = error;
}
-
+*/
reply:
/* Why do we need to reset this? */
@@ -940,9 +999,8 @@
** leaving node, it won't hurt anything - and**
** if there is, they will be recovered. */
case SERVICE_NODE_FAILED:
- DMINFO("A cluster mirror log member has %s",
- (restart_event_type == SERVICE_NODE_FAILED) ?
- "failed." : "left.");
+ if (restart_event_type == SERVICE_NODE_FAILED)
+ DMINFO("A cluster mirror log member has failed.");
list_for_each_entry(lc, &log_list_head, log_list){
if(lc->server_id == my_id){
@@ -968,8 +1026,6 @@
schedule();
}
- DMINFO("Cluster mirror log server is shutting down.");
-
sock_release(sock);
complete(&server_completion);
return 0;
next reply other threads:[~2006-07-22 22:19 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-07-22 22:19 jbrassow [this message]
-- strict thread matches above, loose matches on Subject: below --
2007-10-03 19:02 [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c jbrassow
2007-09-27 20:31 jbrassow
2007-09-26 3:15 jbrassow
2007-09-21 20:07 jbrassow
2007-09-13 15:24 jbrassow
2007-07-11 16:18 jbrassow
2007-04-26 16:55 jbrassow
2007-04-26 16:54 jbrassow
2007-04-24 20:10 jbrassow
2007-04-24 20:08 jbrassow
2007-04-10 7:13 jbrassow
2007-04-10 7:12 jbrassow
2007-04-05 21:33 jbrassow
2007-04-05 21:32 jbrassow
2007-04-03 18:23 jbrassow
2007-04-03 18:21 jbrassow
2007-03-22 22:34 jbrassow
2007-03-22 22:22 jbrassow
2007-03-14 4:28 jbrassow
2007-02-26 17:38 jbrassow
2007-02-20 19:35 jbrassow
2007-02-19 16:29 jbrassow
2007-02-14 17:44 jbrassow
2007-02-02 17:22 jbrassow
2007-01-08 19:28 jbrassow
2006-12-07 18:58 jbrassow
2006-09-05 17:50 jbrassow
2006-09-05 17:48 jbrassow
2006-07-27 23:11 jbrassow
2006-07-27 23:11 jbrassow
2006-07-22 22:19 jbrassow
2006-07-22 22:12 jbrassow
2006-06-29 19:49 jbrassow
2006-06-29 19:48 jbrassow
2006-06-29 19:46 jbrassow
2006-06-27 20:19 jbrassow
2006-06-15 19:48 jbrassow
2006-06-15 19:34 jbrassow
2006-06-13 16:26 jbrassow
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20060722221935.25977.qmail@sourceware.org \
--to=jbrassow@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).