* [Cluster-devel] cluster/cmirror/src cluster.c functions.c func ...
@ 2007-11-05 22:44 jbrassow
0 siblings, 0 replies; 2+ messages in thread
From: jbrassow @ 2007-11-05 22:44 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL5
Changes by: jbrassow at sourceware.org 2007-11-05 22:44:04
Modified files:
cmirror/src : cluster.c functions.c functions.h link_mon.c
local.c
Log message:
- Fix problem with recovery work assignment (still need to add
priority recovery... otherwise, I/O will stall for long periods
during mirror resync)
- Clean-up checkpointing code and fix a couple bugs there that
prevented proper start-up.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/cluster.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/functions.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.2&r2=1.1.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/functions.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.1&r2=1.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/link_mon.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.1&r2=1.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/local.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.2&r2=1.1.2.3
--- cluster/cmirror/src/Attic/cluster.c 2007/11/03 18:53:03 1.1.2.3
+++ cluster/cmirror/src/Attic/cluster.c 2007/11/05 22:44:03 1.1.2.4
@@ -49,6 +49,8 @@
int valid;
struct queue *startup_queue;
+ int checkpoints_needed;
+ uint32_t checkpoint_requesters[10];
struct checkpoint_data *checkpoint_list;
};
@@ -384,7 +386,7 @@
return 0;
}
-static int import_checkpoint(struct clog_cpg *entry)
+static int import_checkpoint(struct clog_cpg *entry, int no_read)
{
int rtn = 0;
SaCkptCheckpointHandleT h;
@@ -422,6 +424,11 @@
saCkptCheckpointUnlink(ckpt_handle, &name);
+ if (no_read) {
+ LOG_DBG("Checkpoint for this log already recieved");
+ goto no_read;
+ }
+
init_retry:
rv = saCkptSectionIterationInitialize(h, SA_CKPT_SECTIONS_ANY, 0, &itr);
if (rv == SA_AIS_ERR_TRY_AGAIN) {
@@ -516,7 +523,7 @@
fail:
saCkptSectionIterationFinalize(itr);
-
+no_read:
saCkptCheckpointClose(h);
free(bitmap);
@@ -542,11 +549,18 @@
* notice in tfr in export_checkpoint function
* by setting tfr->error
*/
- export_checkpoint(cp);
-
- entry->checkpoint_list = cp->next;
- free_checkpoint(cp);
- cp = entry->checkpoint_list;
+ switch (export_checkpoint(cp)) {
+ case -EEXIST:
+ LOG_DBG("Checkpoint already handled by someone else");
+ case 0:
+ entry->checkpoint_list = cp->next;
+ free_checkpoint(cp);
+ cp = entry->checkpoint_list;
+ break;
+ default:
+ /* FIXME: Skipping will cause list corruption */
+ LOG_ERROR("Failed to export checkpoint");
+ }
}
}
EXIT();
@@ -556,7 +570,9 @@
static void cpg_message_callback(cpg_handle_t handle, struct cpg_name *gname,
uint32_t nodeid, uint32_t pid, void *msg, int msg_len)
{
+ int i;
int r = 0;
+ int i_am_server;
struct clog_tfr *tfr = msg;
struct clog_tfr *startup_tfr = NULL;
struct clog_tfr *cp_tfr = NULL;
@@ -567,9 +583,6 @@
if (msg_len != (sizeof(*tfr) + tfr->data_size))
LOG_ERROR("Badly sized message recieved from cluster.");
- LOG_DBG("Message (len = %d) from node/pid %u/%d", msg_len,
- nodeid, pid);
-
if (tfr->request_type & DM_CLOG_RESPONSE)
LOG_DBG("Response from cluster recieved %s",
RQ_TYPE(tfr->request_type & ~DM_CLOG_RESPONSE));
@@ -587,13 +600,31 @@
LOG_ERROR("Unable to find clog_cpg for cluster message");
goto out;
}
+ i_am_server = (my_cluster_id == match->lowest_id) ? 1 : 0;
if (tfr->request_type == DM_CLOG_CHECKPOINT_READY) {
- if ((!match->valid) && (my_cluster_id == tfr->originator)) {
- switch (import_checkpoint(match)) {
+ /* Redundant checkpoints ignored due to match->valid */
+ if (my_cluster_id == tfr->originator) {
+ switch (import_checkpoint(match, match->valid)) {
case 0:
- LOG_DBG("Checkpoint data recieved. Log is now valid");
- match->valid = 1;
+ if (!match->valid) {
+ LOG_DBG("Checkpoint data recieved. Log is now valid");
+ match->valid = 1;
+ while ((startup_tfr = queue_remove(match->startup_queue))) {
+ LOG_DBG("Processing delayed request %d: %s",
+ match->startup_queue->count,
+ RQ_TYPE(startup_tfr->request_type));
+ r = handle_cluster_request(startup_tfr, i_am_server);
+
+ if (r) {
+ LOG_ERROR("Error while processing delayed CPG message");
+ goto out;
+ } else {
+ queue_add(startup_tfr, free_queue);
+ }
+ }
+ }
+
break;
case -EAGAIN:
LOG_PRINT("Checkpoint data empty. Requesting new checkpoint.");
@@ -628,28 +659,28 @@
goto out;
}
- if (tfr->request_type == DM_CLOG_CHECKPOINT_REQUEST) {
- if (tfr->originator == my_cluster_id) {
- /*
- * The checkpoint includes any request up to the
- * request for checkpoint. So, we must clear any
- * previous requests we were storing.
- */
- while ((startup_tfr = queue_remove(match->startup_queue)))
- queue_add(startup_tfr, free_queue);
- } else if (my_cluster_id == match->lowest_id) {
- struct checkpoint_data *new;
-
- new = prepare_checkpoint(match, tfr->originator);
- if (!new) {
- /* FIXME: Need better error handling */
- LOG_ERROR("Failed to prepare checkpoint!!!");
- goto out;
- }
- new->next = match->checkpoint_list;
- match->checkpoint_list = new;
+ /*
+ * If the log is now valid, we can queue the checkpoints
+ */
+ for (i = match->checkpoints_needed; i;) {
+ struct checkpoint_data *new;
+
+ i--;
+ if (log_get_state(tfr) != LOG_RESUMED) {
+ LOG_DBG("Skipping checkpoint for %u, my log is not ready",
+ match->checkpoint_requesters[i]);
+ continue;
}
- goto out;
+ new = prepare_checkpoint(match, match->checkpoint_requesters[i]);
+ if (!new) {
+ /* FIXME: Need better error handling */
+ LOG_ERROR("Failed to prepare checkpoint for %u!!!",
+ match->checkpoint_requesters[i]);
+ break;
+ }
+ match->checkpoints_needed = i;
+ new->next = match->checkpoint_list;
+ match->checkpoint_list = new;
}
if (tfr->request_type & DM_CLOG_RESPONSE)
@@ -665,21 +696,7 @@
goto out;
}
- while ((startup_tfr = queue_remove(match->startup_queue))) {
- LOG_DBG("Processing delayed request %d: %s",
- match->startup_queue->count,
- RQ_TYPE(startup_tfr->request_type));
- r = handle_cluster_request(startup_tfr,
- (my_cluster_id == match->lowest_id) ? 1 : 0);
- if (r) {
- LOG_ERROR("Error while processing delayed CPG message");
- goto out;
- } else {
- queue_add(startup_tfr, free_queue);
- }
- }
-
- r = handle_cluster_request(tfr, (my_cluster_id == match->lowest_id) ? 1 : 0);
+ r = handle_cluster_request(tfr, i_am_server);
}
out:
@@ -694,16 +711,14 @@
struct cpg_address *left_list, int left_list_entries,
struct cpg_address *joined_list, int joined_list_entries)
{
- int i;
+ int i, j;
int my_pid = getpid();
int found = 0;
- int i_was_server = 0;
- int do_checkpoint = 0;
struct clog_cpg *match, *tmp;
ENTER();
- LOG_PRINT("* CPG config callback *********************");
+ LOG_PRINT("****** CPG config callback ****************");
LOG_PRINT("* JOINING (%d):", joined_list_entries);
for (i = 0; i < joined_list_entries; i++)
@@ -751,6 +766,7 @@
goto out;
}
+ /* Assign my_cluster_id */
if (my_cluster_id == 0xDEAD) {
for (i = 0; i < joined_list_entries; i++) {
LOG_DBG("My pid = %d\t\t[%u/%d]", my_pid,
@@ -764,42 +780,30 @@
LOG_PRINT("Setting my cluster id: %u", my_cluster_id);
}
}
- } else if (match->lowest_id == my_cluster_id) {
- LOG_DBG("I was the server (lowest_id = %u, my_cluster_id = %u)",
- match->lowest_id, my_cluster_id);
- i_was_server = 1;
+ goto out;
}
+ /* Find the lowest_id, i.e. the server */
for (i = 0, match->lowest_id = member_list[0].nodeid;
i < member_list_entries; i++)
if (match->lowest_id > member_list[i].nodeid)
match->lowest_id = member_list[i].nodeid;
- if (match->lowest_id == my_cluster_id) {
- /* I am the server now */
- if (!i_was_server)
- LOG_PRINT("I am the new log server for %s", match->name.value);
- else if (joined_list_entries) {
- LOG_PRINT("I must send checkpoint data.");
- do_checkpoint = 1;
- }
- } else if (i_was_server && joined_list_entries) {
- LOG_PRINT("Giving server ownership to %u", match->lowest_id);
- LOG_PRINT("I must send checkpoint data.");
- do_checkpoint = 1;
- }
+ LOG_PRINT("Server is now %u", match->lowest_id);
- if (do_checkpoint) {
- struct checkpoint_data *new;
+ /*
+ * If I am part of the joining list, I do not send checkpoints
+ * FIXME: What are the cases where multiple nodes can join?
+ */
+ for (i = 0; i < joined_list_entries; i++)
+ if (joined_list[i].nodeid == my_cluster_id)
+ goto out;
- for (i = 0; i < joined_list_entries; i++) {
- new = prepare_checkpoint(match, joined_list[i].nodeid);
- if (!new)
- goto out;
- new->next = match->checkpoint_list;
- match->checkpoint_list = new;
- }
+ for (i = 0, j = match->checkpoints_needed; i < joined_list_entries; i++) {
+ LOG_DBG("Joining node, %u needs checkpoint", joined_list[i].nodeid);
+ match->checkpoint_requesters[i + j] = joined_list[i].nodeid;
}
+ match->checkpoints_needed += i;
out:
EXIT();
--- cluster/cmirror/src/Attic/functions.c 2007/11/03 18:37:48 1.1.2.2
+++ cluster/cmirror/src/Attic/functions.c 2007/11/05 22:44:03 1.1.2.3
@@ -56,6 +56,8 @@
FORCESYNC, /* Force a sync to happen */
} sync;
+ uint32_t state; /* current operational state of the log */
+
int disk_fd; /* -1 means no disk log */
int log_dev_failed;
uint64_t disk_nr_regions;
@@ -563,6 +565,8 @@
if (lc->touched)
LOG_DBG("WARNING: log still marked as 'touched' during suspend");
+ lc->state = LOG_SUSPENDED;
+
return 0;
}
@@ -583,12 +587,12 @@
}
/*
- * _clog_resume
+ * clog_resume
* @tfr
*
* Does the main work of resuming.
*/
-static int _clog_resume(struct clog_tfr *tfr)
+static int clog_resume(struct clog_tfr *tfr)
{
uint32_t i;
struct log_c *lc = get_log(tfr->uuid);
@@ -617,6 +621,9 @@
case 3:
LOG_DBG("Non-master resume: bits pre-loaded");
lc->resume_override = 1000;
+ lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count);
+ LOG_DBG("sync_count = %llu", lc->sync_count);
+ goto out;
return 0;
default:
LOG_ERROR("Error:: multiple loading of bits (%d)", lc->resume_override);
@@ -662,36 +669,29 @@
/* copy clean across to sync */
memcpy(lc->sync_bits, lc->clean_bits, size);
lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count);
+ LOG_DBG("sync_count = %llu", lc->sync_count);
lc->sync_search = 0;
/*
- tfr->error = write_log(lc);
- if (tfr->error) {
- lc->log_dev_failed = 1;
- LOG_ERROR("Failed to write initial disk log");
- } else
- lc->log_dev_failed = 0;
- */
- /*
* We mark 'touched' so that only the master commits
* the log via 'commit_log'
*/
lc->touched = 1;
-
+out:
+ lc->state = LOG_RESUMED;
+
return tfr->error;
}
/*
- * clog_resume
+ * local_resume
* @tfr
*
* If the log is pending, we must first join the cpg and
* put the log in the official list.
*
- * If the log is in the official list, then we call
- * _clog_resume.
*/
-static int clog_resume(struct clog_tfr *tfr)
+int local_resume(struct clog_tfr *tfr)
{
int r;
struct log_c *lc = get_log(tfr->uuid);
@@ -714,12 +714,9 @@
/* move log to official list */
list_del_init(&lc->list);
list_add(&lc->list, &log_list);
-
- return 0;
}
- /* log is in the official list, try to resume */
- return _clog_resume(tfr);
+ return 0;
}
/*
@@ -896,7 +893,6 @@
*/
static int clog_get_resync_work(struct clog_tfr *tfr)
{
- int sync_search, conflict=0;
struct {int i; uint64_t r; } *pkg = (void *)tfr->data;
struct log_c *lc = get_log(tfr->uuid);
@@ -905,42 +901,35 @@
tfr->data_size = sizeof(*pkg);
- /*
- * Check if we are done recovering, or if there
- * is already someone recovering.
- */
- if ((lc->sync_search >= lc->region_count) ||
- (lc->recovering_region != (uint64_t)-1)) {
+ if (lc->sync_search >= lc->region_count) {
+ /*
+ * FIXME: handle intermittent errors during recovery
+ * by resetting sync_search... but not to many times.
+ */
+ LOG_DBG(" Recovery has finished");
pkg->i = 0;
return 0;
}
- for (sync_search = lc->sync_search;
- sync_search < lc->region_count;
- sync_search += (pkg->r + 1)) {
- pkg->r = find_next_zero_bit(lc->sync_bits,
- lc->region_count,
- sync_search);
-
- /*
- * If the region is currently marked, we cannot
- * recover it yet.
- */
- if (!log_test_bit(lc->clean_bits, pkg->r))
- conflict = 1;
- else
- break;
+ if (lc->recovering_region != (uint64_t)-1) {
+ LOG_DBG("Someone is already recovering region %Lu",
+ lc->recovering_region);
+ pkg->i = 0;
+ return 0;
}
- if (!conflict)
- lc->sync_search = pkg->r + 1;
+ pkg->r = find_next_zero_bit(lc->sync_bits,
+ lc->region_count,
+ lc->sync_search);
if (pkg->r >= lc->region_count) {
pkg->i = 0;
return 0;
}
- LOG_DBG("Assigning resync work: region = %llu\n", pkg->r);
+ lc->sync_search = pkg->r + 1;
+
+ LOG_DBG(" Assigning resync work: region = %llu\n", pkg->r);
pkg->i = 1;
return 0;
}
@@ -984,6 +973,8 @@
return -EINVAL;
*sync_count = lc->sync_count;
+ LOG_DBG("sync_count = %llu", *sync_count);
+
tfr->data_size = sizeof(*sync_count);
return 0;
@@ -1347,6 +1338,17 @@
return 0;
}
+int log_get_state(struct clog_tfr *tfr)
+{
+ struct log_c *lc;
+
+ lc = get_log(tfr->uuid);
+ if (!lc)
+ return -EINVAL;
+
+ return lc->state;
+}
+
int log_status(void)
{
int found = 0;
@@ -1380,3 +1382,4 @@
}
return found;
}
+
--- cluster/cmirror/src/Attic/functions.h 2007/08/23 19:57:31 1.1.2.1
+++ cluster/cmirror/src/Attic/functions.h 2007/11/05 22:44:03 1.1.2.2
@@ -3,9 +3,14 @@
#include <linux/dm-clog-tfr.h>
+#define LOG_RESUMED 1
+#define LOG_SUSPENDED 2
+
+int local_resume(struct clog_tfr *tfr);
int do_request(struct clog_tfr *tfr);
int commit_log(struct clog_tfr *tfr);
int store_bits(const char *uuid, const char *which, char **buf);
int load_bits(const char *uuid, const char *which, char *buf, int size);
+int log_get_state(struct clog_tfr *tfr);
int log_status(void);
#endif /* __CLOG_FUNCTIONS_DOT_H__ */
--- cluster/cmirror/src/Attic/link_mon.c 2007/08/23 19:57:31 1.1.2.1
+++ cluster/cmirror/src/Attic/link_mon.c 2007/11/05 22:44:03 1.1.2.2
@@ -102,8 +102,6 @@
/* FIXME: handle POLLHUP */
for (i = 0; i < used_pfds; i++)
if (pfds[i].revents & POLLIN) {
- LOG_DBG("Data waiting on file descriptor, %d.",
- pfds[i].fd);
/* FIXME: Add this back return 1;*/
r++;
}
--- cluster/cmirror/src/Attic/local.c 2007/11/03 18:37:48 1.1.2.2
+++ cluster/cmirror/src/Attic/local.c 2007/11/05 22:44:03 1.1.2.3
@@ -170,7 +170,7 @@
* component to join the CPG, and a cluster component
* to handle the request.
*/
- r = do_request(tfr);
+ r = local_resume(tfr);
if (r) {
LOG_DBG("Returning failed request to kernel [%s]",
RQ_TYPE(tfr->request_type));
^ permalink raw reply [flat|nested] 2+ messages in thread
* [Cluster-devel] cluster/cmirror/src cluster.c functions.c func ...
@ 2008-01-14 22:52 jbrassow
0 siblings, 0 replies; 2+ messages in thread
From: jbrassow @ 2008-01-14 22:52 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL5
Changes by: jbrassow at sourceware.org 2008-01-14 22:52:17
Modified files:
cmirror/src : cluster.c functions.c functions.h link_mon.c
link_mon.h local.c logging.h queues.c queues.h
rbtree.c
Log message:
- Several small bug fixes
-- More correct method of leaving CPG (on suspend)
-- close log file desc after finished using
-- fix problem with overlapping recoveries
-- clean-up postsuspend so remote requests do not get lost
-- missing 'break' statement causing seg fault
-- better error checking
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/cluster.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.7&r2=1.1.2.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/functions.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.6&r2=1.1.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/functions.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.2&r2=1.1.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/link_mon.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.2&r2=1.1.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/link_mon.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.1&r2=1.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/local.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.5&r2=1.1.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/logging.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.2&r2=1.1.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/queues.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.1&r2=1.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/queues.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.1&r2=1.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror/src/rbtree.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.2&r2=1.1.2.3
--- cluster/cmirror/src/Attic/cluster.c 2007/11/19 18:00:20 1.1.2.7
+++ cluster/cmirror/src/Attic/cluster.c 2008/01/14 22:52:17 1.1.2.8
@@ -162,7 +162,7 @@
* Errors from previous functions are in the tfr struct.
*/
- LOG_DBG("Sending respose to %u on cluster: [%s/%llu]",
+ LOG_DBG("Sending response to %u on cluster: [%s/%llu]",
tfr->originator,
RQ_TYPE(tfr->request_type & ~DM_CLOG_RESPONSE),
tfr->seq);
@@ -704,6 +704,12 @@
if (!match->valid) {
LOG_DBG("Log not valid yet, storing request");
startup_tfr = queue_remove(free_queue);
+ if (!startup_tfr) {
+ LOG_ERROR("Supply of transfer structs exhausted");
+ r = -ENOMEM; /* FIXME: Better error #? */
+ goto out;
+ }
+
memcpy(startup_tfr, tfr, sizeof(*tfr) + tfr->data_size);
queue_add_tail(startup_tfr, match->startup_queue);
goto out;
@@ -724,7 +730,7 @@
struct cpg_address *left_list, int left_list_entries,
struct cpg_address *joined_list, int joined_list_entries)
{
- int i, j;
+ int i, j, fd;
int my_pid = getpid();
int found = 0;
struct clog_cpg *match, *tmp;
@@ -740,8 +746,8 @@
LOG_PRINT("* MEMBERS (%d):", member_list_entries);
for (i = 0; i < member_list_entries; i++)
- LOG_PRINT("* [%d] nodeid: %d, pid: %d",
- i, member_list[i].nodeid, member_list[i].pid);
+ LOG_PRINT("* nodeid: %d, pid: %d",
+ member_list[i].nodeid, member_list[i].pid);
LOG_PRINT("* LEAVING (%d):", left_list_entries);
for (i = 0; i < left_list_entries; i++)
@@ -768,6 +774,46 @@
goto out;
}
+ /* Am I leaving? */
+ for (i = 0; i < left_list_entries; i++)
+ if (my_cluster_id == left_list[i].nodeid) {
+ struct clog_tfr *tfr;
+
+ LOG_DBG("Finalizing leave...");
+ list_del_init(&match->list);
+
+ cpg_fd_get(match->handle, &fd);
+ links_unregister(fd);
+
+ cluster_postsuspend(match->name.value);
+
+ while (!queue_empty(cluster_queue)) {
+ tfr = queue_remove(cluster_queue);
+
+ /*
+ * A postsuspend is place directly into
+ * the cluster_queue, without going out
+ * to the cluster. This means that only
+ * our postsuspend will ever exist in the
+ * cluster_queue.
+ */
+ if (tfr->request_type == DM_CLOG_POSTSUSPEND)
+ kernel_send(tfr);
+ else
+ queue_add(tfr, free_queue);
+ }
+
+ cpg_finalize(match->handle);
+
+ if (match->startup_queue->count)
+ LOG_ERROR("Startup items remain in cluster log");
+
+ free(match->startup_queue);
+ free(match);
+
+ goto out;
+ }
+
/* Am I the very first to join? */
if (!left_list_entries &&
(member_list_entries == 1) && (joined_list_entries == 1) &&
@@ -887,7 +933,7 @@
/* FIXME: better variable */
cpg_fd_get(new->handle, &r);
- links_register(r, do_cluster_work, NULL);
+ links_register(r, "cluster", do_cluster_work, NULL);
EXIT();
return 0;
@@ -895,26 +941,15 @@
int destroy_cluster_cpg(char *str)
{
- int fd;
+ int r;
struct clog_cpg *del, *tmp;
ENTER();
list_for_each_entry_safe(del, tmp, &clog_cpg_list, list)
if (!strncmp(del->name.value, str, CPG_MAX_NAME_LENGTH)) {
- list_del_init(&del->list);
-
- cpg_fd_get(del->handle, &fd);
- links_unregister(fd);
-
- cpg_leave(del->handle, &del->name);
- cpg_finalize(del->handle);
-
- if (del->startup_queue->count)
- LOG_ERROR("Startup items remain in cluster log");
-
- free(del->startup_queue);
- free(del);
-
+ r = cpg_leave(del->handle, &del->name);
+ if (r != CPG_OK)
+ LOG_ERROR("Error leaving CPG!");
break;
}
--- cluster/cmirror/src/Attic/functions.c 2007/12/06 22:12:21 1.1.2.6
+++ cluster/cmirror/src/Attic/functions.c 2008/01/14 22:52:17 1.1.2.7
@@ -28,6 +28,12 @@
#define MIRROR_DISK_VERSION 2
#define LOG_OFFSET 2
+struct log_header {
+ uint32_t magic;
+ uint32_t version;
+ uint64_t nr_regions;
+};
+
/*
* Used by the 'touched' variable, these macros mean:
* LOG_CHANGED - bits in the in-memory log have changed
@@ -36,12 +42,6 @@
#define LOG_CHANGED 1
#define LOG_FLUSH 2
-struct log_header {
- uint32_t magic;
- uint32_t version;
- uint64_t nr_regions;
-};
-
struct log_c {
struct list_head list;
char uuid[DM_UUID_LEN];
@@ -68,6 +68,8 @@
uint32_t state; /* current operational state of the log */
struct rb_tree mark_tree; /* Tree that tracks all mark requests */
+
+ uint32_t recovery_halted;
struct recovery_request *recovery_request_list;
int disk_fd; /* -1 means no disk log */
@@ -326,7 +328,7 @@
}
-static int find_disk_path(char *major_minor_str, char *path_rtn)
+static int find_disk_path(char *major_minor_str, char *path_rtn, int *unlink_path)
{
int r;
DIR *dp;
@@ -372,6 +374,11 @@
sprintf(path_rtn, "/dev/mapper/%d-%d", major, minor);
r = mknod(path_rtn, S_IFBLK | S_IRUSR | S_IWUSR, MKDEV(major, minor));
+ /*
+ * If we have to make the path, we unlink it after we open it
+ */
+ *unlink_path = 1;
+
return r ? -errno : 0;
}
@@ -387,6 +394,7 @@
int disk_log = 0;
char disk_path[128];
+ int unlink_path = 0;
size_t page_size;
int pages;
@@ -403,7 +411,7 @@
goto fail;
}
- r = find_disk_path(argv[0], disk_path);
+ r = find_disk_path(argv[0], disk_path, &unlink_path);
if (r) {
LOG_ERROR("Unable to find path to device %s", argv[0]);
goto fail;
@@ -497,11 +505,14 @@
r = errno;
goto fail;
}
+ if (unlink_path)
+ unlink(disk_path);
+
lc->disk_fd = r;
lc->disk_size = pages * page_size;
r = posix_memalign(&(lc->disk_buffer), page_size,
- lc->disk_size);
+ lc->disk_size);
if (r) {
LOG_ERROR("Unable to allocate memory for disk_buffer");
goto fail;
@@ -511,6 +522,8 @@
list_add(&lc->list, &log_pending_list);
+ LOG_DBG("Log UUID = %s, mark_tree = 0x%p",
+ SHORT_UUID(lc->uuid), &lc->mark_tree);
EXIT();
return 0;
fail:
@@ -557,7 +570,7 @@
LOG_ERROR("Received constructor request with bad data");
LOG_DBG("strlen(tfr->data)[%d] != tfr->data_size[%d]",
strlen(tfr->data), tfr->data_size);
- LOG_DBG("tfr->data = %s", tfr->data);
+ LOG_DBG("tfr->data = '%s' [%d]", tfr->data, strlen(tfr->data));
return -EINVAL;
}
@@ -591,6 +604,7 @@
LOG_ERROR("Failed to create cluster log (%s)", tfr->uuid);
else
LOG_PRINT("Cluster log created (%s)", tfr->uuid);
+
return r;
}
@@ -603,14 +617,29 @@
{
struct log_c *lc = get_log(tfr->uuid);
- if (!lc)
- return -EINVAL;
-
- destroy_cluster_cpg(tfr->uuid);
+ if (!lc) {
+ /* Is the log in the pending list? */
+ lc = get_pending_log(tfr->uuid);
+ if (!lc) {
+ LOG_ERROR("clog_dtr called on log that is not official or pending");
+ return -EINVAL;
+ }
+ } else {
+ LOG_DBG("[%s] clog_dtr: leaving CPG", SHORT_UUID(lc->uuid));
+ /*
+ * If postsuspend had done the destroy_cluster_cpg,
+ * the log context would be in the pending list
+ */
+ destroy_cluster_cpg(tfr->uuid);
+ }
LOG_PRINT("Cluster log removed (%s)", lc->uuid);
list_del_init(&lc->list);
+ if (lc->disk_fd != -1)
+ close(lc->disk_fd);
+ if (lc->disk_buffer)
+ free(lc->disk_buffer);
free(lc->clean_bits);
free(lc->sync_bits);
free(lc);
@@ -634,6 +663,7 @@
LOG_DBG("WARNING: log still marked as 'touched' during suspend");
lc->state = LOG_SUSPENDED;
+ lc->recovery_halted = 1;
return 0;
}
@@ -650,7 +680,31 @@
if (!lc)
return -EINVAL;
+ LOG_DBG("[%s] clog_postsuspend: leaving CPG", SHORT_UUID(lc->uuid));
+ destroy_cluster_cpg(tfr->uuid);
+
+ return 0;
+}
+
+/*
+ * cluster_postsuspend
+ * @tfr
+ *
+ */
+int cluster_postsuspend(char *uuid)
+{
+ struct log_c *lc = get_log(uuid);
+
+ if (!lc)
+ return -EINVAL;
+
+ LOG_DBG("[%s] clog_postsuspend: finalizing", SHORT_UUID(lc->uuid));
lc->resume_override = 0;
+
+ /* move log to pending list */
+ list_del_init(&lc->list);
+ list_add(&lc->list, &log_pending_list);
+
return 0;
}
@@ -687,12 +741,13 @@
LOG_ERROR("Error:: partial bit loading (just clean_bits)");
return -EINVAL;
case 3:
- LOG_DBG("Non-master resume: bits pre-loaded");
+ LOG_DBG("[%s] Non-master resume: bits pre-loaded",
+ SHORT_UUID(lc->uuid));
lc->resume_override = 1000;
lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count);
- LOG_DBG("Initial sync_count = %llu", lc->sync_count);
+ LOG_DBG("[%s] Initial sync_count = %llu",
+ SHORT_UUID(lc->uuid), lc->sync_count);
goto out;
- return 0;
default:
LOG_ERROR("Error:: multiple loading of bits (%d)", lc->resume_override);
return -EINVAL;
@@ -708,12 +763,15 @@
switch (tfr->error) {
case 0:
if (lc->disk_nr_regions < lc->region_count)
- LOG_DBG("Mirror has grown, updating log bits");
+ LOG_DBG("[%s] Mirror has grown, updating log bits",
+ SHORT_UUID(lc->uuid));
else if (lc->disk_nr_regions > lc->region_count)
- LOG_DBG("Mirror has shrunk, updating log bits");
+ LOG_DBG("[%s] Mirror has shrunk, updating log bits",
+ SHORT_UUID(lc->uuid));
break;
case -EINVAL:
- LOG_DBG("Read log failed: not yet initialized");
+ LOG_DBG("[%s] Read log failed: not yet initialized",
+ SHORT_UUID(lc->uuid));
lc->disk_nr_regions = 0;
break;
default:
@@ -737,7 +795,8 @@
/* copy clean across to sync */
memcpy(lc->sync_bits, lc->clean_bits, size);
lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count);
- LOG_DBG("Initial sync_count = %llu", lc->sync_count);
+ LOG_DBG("[%s] Initial sync_count = %llu",
+ SHORT_UUID(lc->uuid), lc->sync_count);
lc->sync_search = 0;
/*
@@ -747,6 +806,7 @@
lc->touched = LOG_FLUSH;
out:
lc->state = LOG_RESUMED;
+ lc->recovery_halted = 0;
return tfr->error;
}
@@ -858,9 +918,11 @@
*rtn = log_test_bit(lc->sync_bits, region);
if (*rtn)
- LOG_DBG(" Region is in-sync: %llu", region);
+ LOG_DBG("[%s] Region is in-sync: %llu",
+ SHORT_UUID(lc->uuid), region);
else
- LOG_DBG(" Region is not in-sync: %llu", region);
+ LOG_DBG("[%s] Region is not in-sync: %llu",
+ SHORT_UUID(lc->uuid), region);
tfr->data_size = sizeof(*rtn);
@@ -879,14 +941,6 @@
if (!lc)
return -EINVAL;
- /*
- * Are we trying to flush when a mark request conflicts
- * with a recovering region?
- */
- if ((lc->recovering_region != -1) &&
- !log_test_bit(lc->clean_bits, lc->recovering_region))
- return -EAGAIN;
-
/*
* Actual disk flush happens in 'commit_log()'
* Clear LOG_CHANGED and set LOG_FLUSH
@@ -992,8 +1046,8 @@
srsm_count_var = 0;
mark_list = rbt_search_plus(&lc->mark_tree, ®ion, srsm_count, &who);
if (!mark_list || !srsm_count_var) {
- LOG_DBG("Clear issued on region that is not marked: %llu/%u",
- region, who);
+ LOG_DBG("[%s] Clear issued by %u on region not marked: %llu",
+ SHORT_UUID(lc->uuid), who, region);
goto set_bit;
}
@@ -1082,14 +1136,14 @@
* FIXME: handle intermittent errors during recovery
* by resetting sync_search... but not to many times.
*/
- LOG_DBG(" Recovery has finished");
+ LOG_DBG("[%s] Recovery has finished", SHORT_UUID(lc->uuid));
pkg->i = 0;
return 0;
}
if (lc->recovering_region != (uint64_t)-1) {
- LOG_DBG("Someone is already recovering region %Lu",
- lc->recovering_region);
+ LOG_DBG("[%s] Someone is already recovering region %Lu",
+ SHORT_UUID(lc->uuid), lc->recovering_region);
pkg->i = 0;
return 0;
}
@@ -1104,14 +1158,17 @@
free(del);
if (!log_test_bit(lc->sync_bits, pkg->r)) {
- LOG_DBG("Assigning priority resync work to %u: %llu",
- tfr->originator, pkg->r);
+ LOG_DBG("[%s] Assigning priority resync work to %u: %llu",
+ SHORT_UUID(lc->uuid), tfr->originator, pkg->r);
#ifdef DEBUG
- LOG_DBG("Priority work remaining:");
+ LOG_DBG("[%s] Priority work remaining:",
+ SHORT_UUID(lc->uuid));
for (del = lc->recovery_request_list; del; del = del->next)
- LOG_DBG(" %llu", del->region);
+ LOG_DBG("[%s] %llu", SHORT_UUID(lc->uuid),
+ del->region);
#endif
pkg->i = 1;
+ lc->recovering_region = pkg->r;
return 0;
}
}
@@ -1127,8 +1184,10 @@
lc->sync_search = pkg->r + 1;
- LOG_DBG(" Assigning resync work: region = %llu\n", pkg->r);
+ LOG_DBG("[%s] Assigning resync work to %u: region = %llu\n",
+ SHORT_UUID(lc->uuid), tfr->originator, pkg->r);
pkg->i = 1;
+ lc->recovering_region = pkg->r;
return 0;
}
@@ -1153,10 +1212,16 @@
} else {
log_set_bit(lc, lc->sync_bits, pkg->region);
lc->sync_count++;
+ LOG_DBG("[%s] sync_count = %llu, Region %llu marked in-sync by %u",
+ SHORT_UUID(lc->uuid), lc->sync_count,
+ pkg->region, tfr->originator);
}
} else if (log_test_bit(lc->sync_bits, pkg->region)) {
lc->sync_count--;
log_clear_bit(lc, lc->sync_bits, pkg->region);
+ LOG_DBG("[%s] sync_count = %llu, Region %llu marked not in-sync by %u",
+ SHORT_UUID(lc->uuid), lc->sync_count,
+ pkg->region, tfr->originator);
}
tfr->data_size = 0;
@@ -1301,21 +1366,41 @@
if (region > lc->region_count)
return -EINVAL;
- *rtn = !log_test_bit(lc->sync_bits, region);
- if (*rtn) {
+ if (lc->recovery_halted) {
+ LOG_DBG("[%s] Recovery halted... [not remote recovering]: %llu",
+ SHORT_UUID(lc->uuid), region);
+ *rtn = 0;
+ } else {
+ *rtn = !log_test_bit(lc->sync_bits, region);
+ LOG_DBG("[%s] Region is %s: %llu",
+ SHORT_UUID(lc->uuid),
+ (region == lc->recovering_region) ?
+ "currently remote recovering" :
+ (*rtn) ? "pending remote recovery" :
+ "not remote recovering", region);
+ }
+
+ if (*rtn && (region != lc->recovering_region)) {
struct recovery_request *rr;
- LOG_DBG(" Region is busy recovering: %llu", region);
+
+ /* Already in the list? */
+ for (rr = lc->recovery_request_list; rr; rr = rr->next)
+ if (rr->region == region)
+ goto out;
/* Failure to allocated simply means we can't prioritize it */
rr = malloc(sizeof(*rr));
- if (rr) {
- rr->region = region;
- rr->next = lc->recovery_request_list;
- lc->recovery_request_list = rr;
- }
- } else
- LOG_DBG(" Region is not recovering: %llu", region);
+ if (!rr)
+ goto out;
+ LOG_DBG("[%s] Adding region to priority list: %llu",
+ SHORT_UUID(lc->uuid), region);
+ rr->region = region;
+ rr->next = lc->recovery_request_list;
+ lc->recovery_request_list = rr;
+ }
+
+out:
tfr->data_size = sizeof(*rtn);
return 0;
@@ -1336,7 +1421,6 @@
{
int r;
- /* FIXME: lock */
/* FIXME: ENTER discards null check */
ENTER("%s", RQ_TYPE(tfr->request_type));
@@ -1415,7 +1499,6 @@
tfr->data_size = 0;
}
- /* FIXME: unlock */
EXIT();
return 0;
}
@@ -1434,7 +1517,7 @@
struct log_c *lc;
ENTER();
- /* FIXME: lock */
+
lc = get_log(tfr->uuid);
if (!lc) {
@@ -1453,7 +1536,7 @@
LOG_ERROR("Error writing to disk log");
return -EIO;
}
- LOG_DBG("Disk log written");
+ LOG_DBG("[%s] Disk log written", SHORT_UUID(lc->uuid));
}
if (lc->touched & LOG_CHANGED)
@@ -1461,7 +1544,6 @@
lc->touched &= ~LOG_FLUSH;
- /* FIXME: unlock */
out:
EXIT();
return 0;
@@ -1511,11 +1593,11 @@
if (!strncmp(which, "sync_bits", 9)) {
memcpy(*buf, lc->sync_bits, bitset_size);
- LOG_DBG("storing sync_bits:");
+ LOG_DBG("[%s] storing sync_bits:", SHORT_UUID(lc->uuid));
print_bits(*buf, bitset_size);
} else if (!strncmp(which, "clean_bits", 9)) {
memcpy(*buf, lc->clean_bits, bitset_size);
- LOG_DBG("storing clean_bits:");
+ LOG_DBG("[%s] storing clean_bits:", SHORT_UUID(lc->uuid));
print_bits(*buf, bitset_size);
}
@@ -1545,12 +1627,12 @@
if (!strncmp(which, "sync_bits", 9)) {
lc->resume_override += 1;
memcpy(lc->sync_bits, buf, bitset_size);
- LOG_DBG("loading sync_bits:");
+ LOG_DBG("[%s] loading sync_bits:", SHORT_UUID(lc->uuid));
print_bits((char *)lc->sync_bits, bitset_size);
} else if (!strncmp(which, "clean_bits", 9)) {
lc->resume_override += 2;
memcpy(lc->clean_bits, buf, bitset_size);
- LOG_DBG("loading clean_bits:");
+ LOG_DBG("[%s] loading clean_bits:", SHORT_UUID(lc->uuid));
print_bits((char *)lc->clean_bits, bitset_size);
}
--- cluster/cmirror/src/Attic/functions.h 2007/11/05 22:44:03 1.1.2.2
+++ cluster/cmirror/src/Attic/functions.h 2008/01/14 22:52:17 1.1.2.3
@@ -7,6 +7,7 @@
#define LOG_SUSPENDED 2
int local_resume(struct clog_tfr *tfr);
+int cluster_postsuspend(char *);
int do_request(struct clog_tfr *tfr);
int commit_log(struct clog_tfr *tfr);
int store_bits(const char *uuid, const char *which, char **buf);
--- cluster/cmirror/src/Attic/link_mon.c 2007/11/05 22:44:03 1.1.2.2
+++ cluster/cmirror/src/Attic/link_mon.c 2008/01/14 22:52:17 1.1.2.3
@@ -6,7 +6,7 @@
struct link_callback {
int fd;
-
+ char *name;
void *data;
int (*callback)(void *data);
@@ -18,7 +18,7 @@
static struct pollfd *pfds = NULL;
static struct link_callback *callbacks = NULL;
-int links_register(int fd, int (*callback)(void *data), void *data)
+int links_register(int fd, char *name, int (*callback)(void *data), void *data)
{
int i;
struct link_callback *lc;
@@ -40,6 +40,7 @@
}
lc->fd = fd;
+ lc->name = name;
lc->data = data;
lc->callback = callback;
@@ -64,6 +65,9 @@
lc->next = callbacks;
callbacks = lc;
+ LOG_DBG("Adding %s/%d", lc->name, lc->fd);
+ LOG_DBG(" used_pfds = %d, free_pfds = %d",
+ used_pfds, free_pfds);
EXIT();
return 0;
@@ -72,14 +76,27 @@
int links_unregister(int fd)
{
int i;
+ struct link_callback *p, *c;
- for (i = 0; i < used_pfds; i++) {
+ for (i = 0; i < used_pfds; i++)
if (fd == pfds[i].fd) {
- pfds[i].fd = pfds[used_pfds - 1].fd;
+ /* entire struct is copied (overwritten) */
+ pfds[i] = pfds[used_pfds - 1];
used_pfds--;
free_pfds++;
}
- }
+
+ for (p = NULL, c = callbacks; c; p = c, c = c->next)
+ if (fd == c->fd) {
+ LOG_DBG("Freeing up %s/%d", c->name, c->fd);
+ LOG_DBG(" used_pfds = %d, free_pfds = %d",
+ used_pfds, free_pfds);
+ if (p)
+ p->next = c->next;
+ else
+ callbacks = c->next;
+ free(c);
+ }
return 0;
}
@@ -102,6 +119,8 @@
/* FIXME: handle POLLHUP */
for (i = 0; i < used_pfds; i++)
if (pfds[i].revents & POLLIN) {
+ LOG_DBG("Data ready on %d", pfds[i].fd);
+
/* FIXME: Add this back return 1;*/
r++;
}
@@ -118,10 +137,13 @@
if (pfds[i].revents & POLLIN)
for (lc = callbacks; lc; lc = lc->next)
if (pfds[i].fd == lc->fd) {
+ LOG_DBG("Issuing callback on %s/%d",
+ lc->name, lc->fd);
r = lc->callback(lc->data);
if (r)
- LOG_ERROR("Bad callback on file desc, %d",
- lc->fd);
+ LOG_ERROR("Bad callback on %s/%d",
+ lc->name, lc->fd);
+ break;
}
return 0;
}
--- cluster/cmirror/src/Attic/link_mon.h 2007/08/23 19:57:31 1.1.2.1
+++ cluster/cmirror/src/Attic/link_mon.h 2008/01/14 22:52:17 1.1.2.2
@@ -1,7 +1,7 @@
#ifndef __LINK_MON_DOT_H__
#define __LINK_MON_DOT_H__
-int links_register(int fd, int (*callback)(void *data), void *data);
+int links_register(int fd, char *name, int (*callback)(void *data), void *data);
int links_unregister(int fd);
int links_monitor(void);
int links_issue_callbacks(void);
--- cluster/cmirror/src/Attic/local.c 2007/11/09 05:47:05 1.1.2.5
+++ cluster/cmirror/src/Attic/local.c 2008/01/14 22:52:17 1.1.2.6
@@ -84,7 +84,7 @@
return -ENOMEM;
}
- memset(*tfr, 0, sizeof(struct clog_tfr));
+ memset(*tfr, 0, DM_CLOG_TFR_SIZE);
r = kernel_recv_helper(*tfr, DM_CLOG_TFR_SIZE);
if (r) {
@@ -158,6 +158,7 @@
case DM_CLOG_DTR:
case DM_CLOG_STATUS_INFO:
case DM_CLOG_STATUS_TABLE:
+ case DM_CLOG_PRESUSPEND:
r = do_request(tfr);
if (r)
LOG_DBG("Returning failed request to kernel [%s]",
@@ -168,6 +169,19 @@
RQ_TYPE(tfr->request_type));
break;
+ case DM_CLOG_POSTSUSPEND:
+ r = do_request(tfr);
+ if (r) {
+ LOG_DBG("Returning failed request to kernel [%s]",
+ RQ_TYPE(tfr->request_type));
+ r = kernel_send(tfr);
+ if (r)
+ LOG_ERROR("Failed to respond to kernel [%s]",
+ RQ_TYPE(tfr->request_type));
+ }
+ queue_add_tail(tfr, cluster_queue);
+
+ break;
case DM_CLOG_RESUME:
/*
* Resume is a special case that requires a local
@@ -189,6 +203,9 @@
/* Add before send_to_cluster, so cluster code can find it */
queue_add_tail(tfr, cluster_queue);
r = cluster_send(tfr);
+ if (r)
+ LOG_ERROR("Unable to send request to cluster: %s",
+ strerror(-r));
break;
}
@@ -298,7 +315,7 @@
r = fcntl(cn_fd, F_SETFL, FNDELAY);
*/
- links_register(cn_fd, do_local_work, NULL);
+ links_register(cn_fd, "local", do_local_work, NULL);
EXIT();
return 0;
--- cluster/cmirror/src/Attic/logging.h 2007/11/03 18:53:03 1.1.2.2
+++ cluster/cmirror/src/Attic/logging.h 2008/01/14 22:52:17 1.1.2.3
@@ -4,6 +4,9 @@
#include <stdio.h>
#include <syslog.h>
+/* SHORT_UUID - print last 8 chars of a string */
+#define SHORT_UUID(x) ((x) + (strlen(x) - 8))
+
extern int log_tabbing;
extern int log_is_open;
--- cluster/cmirror/src/Attic/queues.c 2007/08/23 19:57:31 1.1.2.1
+++ cluster/cmirror/src/Attic/queues.c 2008/01/14 22:52:17 1.1.2.2
@@ -81,7 +81,18 @@
int queue_status(void)
{
+ int i=1;
+ struct clog_tfr *tfr;
+ struct list_head *p, *n;
+
LOG_PRINT("cluster_queue: %d", cluster_queue->count);
+ list_for_each_safe(p, n, &cluster_queue->list) {
+ tfr = (struct clog_tfr *)p;
+ LOG_PRINT(" %d) %s, originator = %u",
+ i++, RQ_TYPE(tfr->request_type),
+ tfr->originator);
+ }
+
LOG_PRINT("free_queue : %d", free_queue->count);
return cluster_queue->count;
--- cluster/cmirror/src/Attic/queues.h 2007/08/23 19:57:31 1.1.2.1
+++ cluster/cmirror/src/Attic/queues.h 2008/01/14 22:52:17 1.1.2.2
@@ -24,6 +24,6 @@
int (*f)(struct clog_tfr *, struct clog_tfr *),
struct clog_tfr *tfr_cmp);
void queue_remove_all(struct list_head *l, struct queue *q);
-int queue_emtpy(struct queue *q);
+int queue_empty(struct queue *q);
#endif /* __CLUSTER_LOG_QUEUES_DOT_H__ */
--- cluster/cmirror/src/Attic/rbtree.c 2007/11/09 05:47:05 1.1.2.2
+++ cluster/cmirror/src/Attic/rbtree.c 2008/01/14 22:52:17 1.1.2.3
@@ -51,6 +51,13 @@
{
struct rb_node *tmp;
+ if (RIGHT(axis) == NIL(t)) {
+ LOG_ERROR("Tree error: unable to left rotate");
+ return axis;
+ }
+
+ LOG_DBG("TREE[0x%p]: left_rotate [axis = %llu]",
+ t, *((uint64_t *)KEY(axis)));
tmp = RIGHT(axis);
RIGHT(axis) = LEFT(tmp);
if (LEFT(tmp) != NIL(t))
@@ -74,6 +81,13 @@
{
struct rb_node *tmp;
+ if (LEFT(axis) == NIL(t)) {
+ LOG_ERROR("Tree error: unable to right rotate");
+ return axis;
+ }
+
+ LOG_DBG("TREE[0x%p]: right_rotate [axis = %llu]",
+ t, *((uint64_t *)KEY(axis)));
tmp = LEFT(axis);
LEFT(axis) = RIGHT(tmp);
if (RIGHT(tmp) != NIL(t))
@@ -128,9 +142,13 @@
t->in_use_nodes++;
if (t->in_use_nodes > t->max_nodes) {
t->max_nodes = t->in_use_nodes;
- LOG_DBG("Maximum tree nodes now at %d", t->max_nodes);
+ LOG_PRINT("TREE[0x%p]: Maximum tree nodes now at %d",
+ t, t->max_nodes);
}
+ LOG_DBG("TREE[0x%p]: allocating node (in_use_nodes = %d, max_nodes = %d)",
+ t, t->in_use_nodes, t->max_nodes);
+
if (t->free_list) {
new = t->free_list;
t->free_list = new->rb_next;
@@ -150,6 +168,9 @@
d->rb_next = t->free_list;
t->free_list = d;
+ LOG_DBG("TREE[0x%p]: freeing node (in_use_nodes = %d, max_nodes = %d)",
+ t, t->in_use_nodes, t->max_nodes);
+
/* FIXME: We never free any nodes */
}
@@ -157,7 +178,8 @@
{
struct rb_node *tmp;
- LOG_DBG("TREE: insert %llu", *((uint64_t *)KEY(new)));
+ LOG_DBG("TREE[0x%p]: inserting %llu",
+ t, *((uint64_t *)KEY(new)));
if (!t || !new) {
errno = EINVAL;
return -1;
@@ -221,6 +243,7 @@
left_rotate(t, PARENT(x));
w = RIGHT(PARENT(x));
}
+
if ((COLOR(LEFT(w)) == BLACK) && (COLOR(RIGHT(w)) == BLACK)) {
COLOR(w) = RED;
x = PARENT(x);
@@ -296,7 +319,8 @@
{
struct rb_node *x, *y;
- LOG_DBG("TREE: remove %llu", *((uint64_t *)KEY(del)));
+ LOG_DBG("TREE[0x%p]: removing %llu",
+ t, *((uint64_t *)KEY(del)));
if ((LEFT(del) == NIL(t)) || (RIGHT(del) == NIL(t)))
y = del;
else
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2008-01-14 22:52 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-11-05 22:44 [Cluster-devel] cluster/cmirror/src cluster.c functions.c func jbrassow
-- strict thread matches above, loose matches on Subject: below --
2008-01-14 22:52 jbrassow
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).