From: David Teigland <teigland@redhat.com>
To: lvm-devel@redhat.com
Subject: [PATCH] clvmd: detect 3.7 dlm kernel bug
Date: Mon, 1 Apr 2013 17:13:39 -0400 [thread overview]
Message-ID: <20130401211339.GB11525@redhat.com> (raw)
Kernel commit 2b75bc9121e54e22537207b47b71373bcb0be41c
included a bug that causes dlm lock requests on max len
resources to return an EINVAL error, on CONFIG_COMPAT systems.
This bug was included in 3.7 and fixed in 3.8 by commit
d4b0bcf32b946590afd29e202d6a399b84fe6c67.
This clvmd patch acquires a new dlm lock on a per-node max len
resource during startup. If this fails with EINVAL, and the
system may be config compat, then we try taking a dlm lock
on a non-max len resource. If the second lock is successful,
then log a message stating that the kernel may include the
dlm bug above. The other reasons for the dlm to return EINVAL
do not appear likely to apply to this case.
(I don't know if the 3.7 kernel is still used widely enough
to include this patch.)
---
daemons/clvmd/clvmd-corosync.c | 77 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 77 insertions(+)
diff --git a/daemons/clvmd/clvmd-corosync.c b/daemons/clvmd/clvmd-corosync.c
index d85ec1e..6fbc652 100644
--- a/daemons/clvmd/clvmd-corosync.c
+++ b/daemons/clvmd/clvmd-corosync.c
@@ -42,6 +42,7 @@
#include <libdlm.h>
#include <syslog.h>
+#include <sys/utsname.h>
/* Timeout value for several corosync calls */
#define LOCKSPACE_NAME "clvmd"
@@ -75,6 +76,9 @@ static quorum_handle_t quorum_handle;
/* DLM Handle */
static dlm_lshandle_t *lockspace;
+static char node_resource_str[DLM_RESNAME_MAXLEN + 1];
+static uint32_t node_resource_lkid;
+
static struct cpg_name cpg_group_name;
/* Corosync callback structs */
@@ -278,6 +282,72 @@ static void corosync_cpg_confchg_callback(cpg_handle_t handle,
num_nodes = member_list_entries;
}
+static int _lock_node_resource(void)
+{
+ struct dlm_lksb lksb;
+ int i, rv, error;
+
+ memset(node_resource_str, 0, DLM_RESNAME_MAXLEN);
+ sprintf(node_resource_str, "node%08x", our_nodeid);
+
+ /* make the resource name the max length */
+ for (i = strlen(node_resource_str); i < DLM_RESNAME_MAXLEN; i++)
+ node_resource_str[i] = '.';
+
+ DEBUGLOG("lock_node_resource len %zu %s\n",
+ strlen(node_resource_str), node_resource_str);
+
+ memset(&lksb, 0, sizeof(lksb));
+
+ rv = dlm_ls_lock_wait(lockspace, LKM_EXMODE, &lksb, 0,
+ node_resource_str, DLM_RESNAME_MAXLEN,
+ 0, NULL, NULL, NULL);
+ if (!rv) {
+ node_resource_lkid = lksb.sb_lkid;
+ return 0;
+ }
+
+ error = errno;
+
+ /*
+ * Check if this may be the 3.7 dlm kernel bug so we can
+ * log an informative error message. Remove this check
+ * once 3.7 kernels are not being used.
+ */
+
+ if ((error == EINVAL) && (sizeof(long) != sizeof(long long))) {
+ struct utsname un;
+
+ /*
+ * Do not make resource name the max length; the shorter
+ * length should pass the buggy size check. If the max
+ * len fails with EINVAL and the short len succeeds, then
+ * this is probably the buggy size check in the kernel.
+ */
+
+ memset(node_resource_str, 0, DLM_RESNAME_MAXLEN);
+ sprintf(node_resource_str, "test%08x", our_nodeid);
+
+ memset(&lksb, 0, sizeof(lksb));
+
+ rv = dlm_ls_lock_wait(lockspace, LKM_NLMODE, &lksb, 0,
+ node_resource_str, strlen(node_resource_str),
+ 0, NULL, NULL, NULL);
+ if (!rv) {
+ memset(&un, 0, sizeof(un));
+ uname(&un);
+
+ DEBUGLOG("dlm 3.7 CONFIG_COMPAT bug may exist in kernel %s\n", un.release);
+ syslog(LOG_ERR, "dlm 3.7 CONFIG_COMPAT bug may exist in kernel %s\n", un.release);
+ }
+ }
+
+ DEBUGLOG("dlm lock error %d on node %d resource\n", error, our_nodeid);
+ syslog(LOG_ERR, "dlm lock error %d on node %d resource\n", error, our_nodeid);
+
+ return error;
+}
+
static int _init_cluster(void)
{
cs_error_t err;
@@ -358,6 +428,13 @@ static int _init_cluster(void)
}
DEBUGLOG("Our local node id is %d\n", our_nodeid);
+ if (_lock_node_resource()) {
+ dlm_release_lockspace(LOCKSPACE_NAME, lockspace, 1);
+ cpg_finalize(cpg_handle);
+ quorum_finalize(quorum_handle);
+ return -1;
+ }
+
DEBUGLOG("Connected to Corosync\n");
return 0;
--
1.8.1.rc1.5.g7e0651a
reply other threads:[~2013-04-01 21:13 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130401211339.GB11525@redhat.com \
--to=teigland@redhat.com \
--cc=lvm-devel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.