From mboxrd@z Thu Jan 1 00:00:00 1970 From: jbrassow@sourceware.org Date: 25 Jan 2008 16:23:25 -0000 Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-clog.c Message-ID: <20080125162325.23412.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Branch: RHEL5 Changes by: jbrassow at sourceware.org 2008-01-25 16:23:25 Modified files: cmirror-kernel/src: dm-clog.c Log message: - calling dm_get_device fixes rename bug 205641 - caching extra state in the kernel helps reduce cluster traffic 90%, this improves performance Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-clog.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2.2.7&r2=1.2.2.8 --- cluster/cmirror-kernel/src/dm-clog.c 2008/01/23 21:22:28 1.2.2.7 +++ cluster/cmirror-kernel/src/dm-clog.c 2008/01/25 16:23:24 1.2.2.8 @@ -23,8 +23,12 @@ char *ctr_str; /* Gives ability to restart if userspace dies */ uint32_t ctr_size; + uint32_t in_sync_hint; + spinlock_t flush_lock; struct list_head flush_list; /* only for clear and mark requests */ + + struct dm_dev *disk_log; }; static mempool_t *flush_entry_pool = NULL; @@ -78,21 +82,21 @@ } static int cluster_ctr(struct dirty_log *log, struct dm_target *ti, - unsigned int argc, char **argv, int disk_log) + unsigned int argc, char **argv, + struct dm_dev *disk_log) { int i; int r = 0; int str_size; + int offset = (disk_log) ? 1 : 0; char *ctr_str = NULL; struct log_c *lc = NULL; uint32_t region_size; region_t region_count; /* Already checked argument count */ - if (disk_log != 0 && disk_log != 1) - return -EINVAL; - if (sscanf(argv[disk_log], "%u", ®ion_size) != 1) { + if (sscanf(argv[offset], "%u", ®ion_size) != 1) { DMWARN("Invalid region size string"); return -EINVAL; } @@ -108,9 +112,10 @@ lc->ti = ti; lc->region_size = region_size; lc->region_count = region_count; + lc->disk_log = disk_log; /* FIXME: Need to check size of uuid arg */ - memcpy(lc->uuid, argv[1 + disk_log], DM_UUID_LEN); + memcpy(lc->uuid, argv[1 + offset], DM_UUID_LEN); spin_lock_init(&lc->flush_lock); INIT_LIST_HEAD(&lc->flush_list); @@ -174,7 +179,7 @@ return -EINVAL; } - r = cluster_ctr(log, ti, argc, argv, 0); + r = cluster_ctr(log, ti, argc, argv, NULL); return r; } @@ -195,7 +200,9 @@ static int cluster_disk_ctr(struct dirty_log *log, struct dm_target *ti, unsigned int argc, char **argv) { - int i; + int r, i; + struct dm_dev *dev; + if ((argc < 4) || (argc > 5)) { DMERR("Too %s arguments to clustered-disk mirror log type.", (argc < 3) ? "few" : "many"); @@ -205,7 +212,15 @@ return -EINVAL; } - return cluster_ctr(log, ti, argc, argv, 1); + r = dm_get_device(ti, argv[0], 0, 0, FMODE_READ | FMODE_WRITE, &dev); + if (r) + return r; + + r = cluster_ctr(log, ti, argc, argv, dev); + if (r) + dm_put_device(ti, dev); + + return r; } /* @@ -222,6 +237,8 @@ NULL, NULL); /* FIXME: What do we do on failure? */ + if (lc->disk_log) + dm_put_device(lc->ti, lc->disk_log); kfree(lc->ctr_str); kfree(lc); @@ -269,6 +286,7 @@ int r; struct log_c *lc = (struct log_c *)log->context; + lc->in_sync_hint = 0; r = dm_clog_consult_server(lc->uuid, DM_CLOG_RESUME, NULL, 0, NULL, NULL); @@ -335,6 +353,19 @@ int rdata_size; struct log_c *lc = (struct log_c *)log->context; + /* + * We can never respond directly - even if in_sync_hint is + * set. This is because another machine could see a device + * failure and mark the region out-of-sync. If we don't go + * to userspace to ask, we might think the region is in-sync + * and allow a read to pick up data that is stale. (This is + * very unlikely if a device actually fails; but it is very + * likely if a connection to one device from one machine fails.) + * + * There still might be a problem if the mirror caches the region + * state as in-sync... but then this call would not be made. So, + * that is a mirror problem. + */ if (!can_block) return -EWOULDBLOCK; @@ -559,7 +590,19 @@ NULL, 0, (char *)&sync_count, &rdata_size); - return (r) ? 0 : sync_count; + if (r) + return 0; + + if (sync_count == lc->region_count) + lc->in_sync_hint = 1; + /* + * get_sync_count is never called after the + * initial sync=1 + else + lc->in_sync_hint = 0; + */ + + return sync_count; } /* @@ -610,6 +653,16 @@ int rdata_size; struct log_c *lc = (struct log_c *)log->context; + /* + * Once the mirror has been reported to be in-sync, + * it will never again ask for recovery work. So, + * we can safely say there is not a remote machine + * recovering if the device is in-sync. (in_sync_hint + * must be reset at resume time.) + */ + if (lc->in_sync_hint) + return 0; + rdata_size = sizeof(is_recovering); r = cluster_do_request(lc, lc->uuid, DM_CLOG_IS_REMOTE_RECOVERING, (char *)®ion, sizeof(region),