From: jbrassow@sourceware.org <jbrassow@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cmirror-kernel/src dm-clog.c
Date: 25 Jan 2008 16:23:25 -0000 [thread overview]
Message-ID: <20080125162325.23412.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL5
Changes by: jbrassow at sourceware.org 2008-01-25 16:23:25
Modified files:
cmirror-kernel/src: dm-clog.c
Log message:
- calling dm_get_device fixes rename bug 205641
- caching extra state in the kernel helps reduce cluster traffic 90%,
this improves performance
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-clog.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2.2.7&r2=1.2.2.8
--- cluster/cmirror-kernel/src/dm-clog.c 2008/01/23 21:22:28 1.2.2.7
+++ cluster/cmirror-kernel/src/dm-clog.c 2008/01/25 16:23:24 1.2.2.8
@@ -23,8 +23,12 @@
char *ctr_str; /* Gives ability to restart if userspace dies */
uint32_t ctr_size;
+ uint32_t in_sync_hint;
+
spinlock_t flush_lock;
struct list_head flush_list; /* only for clear and mark requests */
+
+ struct dm_dev *disk_log;
};
static mempool_t *flush_entry_pool = NULL;
@@ -78,21 +82,21 @@
}
static int cluster_ctr(struct dirty_log *log, struct dm_target *ti,
- unsigned int argc, char **argv, int disk_log)
+ unsigned int argc, char **argv,
+ struct dm_dev *disk_log)
{
int i;
int r = 0;
int str_size;
+ int offset = (disk_log) ? 1 : 0;
char *ctr_str = NULL;
struct log_c *lc = NULL;
uint32_t region_size;
region_t region_count;
/* Already checked argument count */
- if (disk_log != 0 && disk_log != 1)
- return -EINVAL;
- if (sscanf(argv[disk_log], "%u", ®ion_size) != 1) {
+ if (sscanf(argv[offset], "%u", ®ion_size) != 1) {
DMWARN("Invalid region size string");
return -EINVAL;
}
@@ -108,9 +112,10 @@
lc->ti = ti;
lc->region_size = region_size;
lc->region_count = region_count;
+ lc->disk_log = disk_log;
/* FIXME: Need to check size of uuid arg */
- memcpy(lc->uuid, argv[1 + disk_log], DM_UUID_LEN);
+ memcpy(lc->uuid, argv[1 + offset], DM_UUID_LEN);
spin_lock_init(&lc->flush_lock);
INIT_LIST_HEAD(&lc->flush_list);
@@ -174,7 +179,7 @@
return -EINVAL;
}
- r = cluster_ctr(log, ti, argc, argv, 0);
+ r = cluster_ctr(log, ti, argc, argv, NULL);
return r;
}
@@ -195,7 +200,9 @@
static int cluster_disk_ctr(struct dirty_log *log, struct dm_target *ti,
unsigned int argc, char **argv)
{
- int i;
+ int r, i;
+ struct dm_dev *dev;
+
if ((argc < 4) || (argc > 5)) {
DMERR("Too %s arguments to clustered-disk mirror log type.",
(argc < 3) ? "few" : "many");
@@ -205,7 +212,15 @@
return -EINVAL;
}
- return cluster_ctr(log, ti, argc, argv, 1);
+ r = dm_get_device(ti, argv[0], 0, 0, FMODE_READ | FMODE_WRITE, &dev);
+ if (r)
+ return r;
+
+ r = cluster_ctr(log, ti, argc, argv, dev);
+ if (r)
+ dm_put_device(ti, dev);
+
+ return r;
}
/*
@@ -222,6 +237,8 @@
NULL, NULL);
/* FIXME: What do we do on failure? */
+ if (lc->disk_log)
+ dm_put_device(lc->ti, lc->disk_log);
kfree(lc->ctr_str);
kfree(lc);
@@ -269,6 +286,7 @@
int r;
struct log_c *lc = (struct log_c *)log->context;
+ lc->in_sync_hint = 0;
r = dm_clog_consult_server(lc->uuid, DM_CLOG_RESUME,
NULL, 0,
NULL, NULL);
@@ -335,6 +353,19 @@
int rdata_size;
struct log_c *lc = (struct log_c *)log->context;
+ /*
+ * We can never respond directly - even if in_sync_hint is
+ * set. This is because another machine could see a device
+ * failure and mark the region out-of-sync. If we don't go
+ * to userspace to ask, we might think the region is in-sync
+ * and allow a read to pick up data that is stale. (This is
+ * very unlikely if a device actually fails; but it is very
+ * likely if a connection to one device from one machine fails.)
+ *
+ * There still might be a problem if the mirror caches the region
+ * state as in-sync... but then this call would not be made. So,
+ * that is a mirror problem.
+ */
if (!can_block)
return -EWOULDBLOCK;
@@ -559,7 +590,19 @@
NULL, 0,
(char *)&sync_count, &rdata_size);
- return (r) ? 0 : sync_count;
+ if (r)
+ return 0;
+
+ if (sync_count == lc->region_count)
+ lc->in_sync_hint = 1;
+ /*
+ * get_sync_count is never called after the
+ * initial sync=1
+ else
+ lc->in_sync_hint = 0;
+ */
+
+ return sync_count;
}
/*
@@ -610,6 +653,16 @@
int rdata_size;
struct log_c *lc = (struct log_c *)log->context;
+ /*
+ * Once the mirror has been reported to be in-sync,
+ * it will never again ask for recovery work. So,
+ * we can safely say there is not a remote machine
+ * recovering if the device is in-sync. (in_sync_hint
+ * must be reset at resume time.)
+ */
+ if (lc->in_sync_hint)
+ return 0;
+
rdata_size = sizeof(is_recovering);
r = cluster_do_request(lc, lc->uuid, DM_CLOG_IS_REMOTE_RECOVERING,
(char *)®ion, sizeof(region),
next reply other threads:[~2008-01-25 16:23 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-01-25 16:23 jbrassow [this message]
-- strict thread matches above, loose matches on Subject: below --
2008-02-13 15:06 [Cluster-devel] cluster/cmirror-kernel/src dm-clog.c jbrassow
2008-01-23 21:22 jbrassow
2008-01-21 20:37 jbrassow
2007-08-30 18:26 jbrassow
2006-06-26 20:10 jbrassow
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080125162325.23412.qmail@sourceware.org \
--to=jbrassow@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).