From mboxrd@z Thu Jan 1 00:00:00 1970 From: jbrassow@sourceware.org Date: 2 Feb 2007 17:22:56 -0000 Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c ... Message-ID: <20070202172256.30759.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Branch: RHEL4 Changes by: jbrassow at sourceware.org 2007-02-02 17:22:55 Modified files: cmirror-kernel/src: dm-cmirror-client.c dm-cmirror-server.c Log message: - Fix for bug #225337 Reset 'sync_search' if (lc->sync_search >= lc->region_count) && (lc->sync_count < lc->region_count). It indicates that a failure during recovery has taken place, and we are likely able to handle it. Also, do not issue clear/mark region requests if it is already known that the log device has failed. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-client.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.34&r2=1.1.2.35 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-server.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.19&r2=1.1.2.20 --- cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c 2007/01/08 19:28:26 1.1.2.34 +++ cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c 2007/02/02 17:22:55 1.1.2.35 @@ -568,7 +568,7 @@ ** while we are here. If the clear region request fails, it** ** would be re-added - perhaps prematurely clearing the bit */ - if(rs){ + if(rs && !rs->rs_lc->log_dev_failed){ _consult_server(rs->rs_lc, rs->rs_region, LRT_CLEAR_REGION, NULL, &retry); @@ -951,33 +951,35 @@ spin_unlock(®ion_state_lock); - while((error = consult_server(lc, region, LRT_MARK_REGION, NULL))){ - if (error == -EBUSY) { - /* Remote recovering delay and try again */ - DMDEBUG("Delaying mark to region %Lu, due to recovery", - region); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ/2); - continue; - } + if (!lc->log_dev_failed) { + while((error = consult_server(lc, region, LRT_MARK_REGION, NULL))){ + if (error == -EBUSY) { + /* Remote recovering delay and try again */ + DMDEBUG("Delaying mark to region %Lu, due to recovery", + region); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ/2); + continue; + } - if (error == -EIO) { - lc->log_dev_failed = 1; - break; + if (error == -EIO) { + lc->log_dev_failed = 1; + break; + } + DMWARN("unable to get server (%u) to mark region (%Lu)", + lc->server_id, region); + DMWARN("Reason :: %d", error); } - DMWARN("unable to get server (%u) to mark region (%Lu)", - lc->server_id, region); - DMWARN("Reason :: %d", error); - } - if (lc->log_dev_failed) { - dm_table_event(lc->ti->table); - /* - DMERR("Write failed on mirror log device, %s", - lc->log_dev->name); - if (!atomic_read(&lc->suspended)) - wait_for_completion(&lc->failure_completion); - */ + if (lc->log_dev_failed) { + dm_table_event(lc->ti->table); + /* + DMERR("Write failed on mirror log device, %s", + lc->log_dev->name); + if (!atomic_read(&lc->suspended)) + wait_for_completion(&lc->failure_completion); + */ + } } return; } --- cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/01/08 19:28:26 1.1.2.19 +++ cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/02/02 17:22:55 1.1.2.20 @@ -224,8 +224,16 @@ static int _core_get_resync_work(struct log_c *lc, region_t *region) { - if (lc->sync_search >= lc->region_count){ - return 0; + if (lc->sync_search >= lc->region_count) { + /* + * FIXME: pvmove is not supported yet, but when it is, + * an audit of sync_count changes will need to be made + */ + if (lc->sync_count < lc->region_count) { + lc->sync_search = 0; + } else { + return 0; + } } do { *region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits, @@ -557,9 +565,12 @@ if(!find_ru_by_region(lc, lr->u.lr_region)){ log_set_bit(lc, lc->clean_bits, lr->u.lr_region); + write_bits(lc); + /* if (write_bits(lc)) DMERR("Write bits failed on mirror log device, %s", lc->log_dev->name); + */ } return 0; }