From: Peng Tao <bergwolf@gmail.com>
To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-kernel@vger.kernel.org,
Andriy Skulysh <Andriy_Skulysh@xyratex.com>,
Peng Tao <bergwolf@gmail.com>,
Andreas Dilger <andreas.dilger@intel.com>
Subject: [PATCH 40/40] staging/lustre/ptlrpc: flock deadlock detection does not work
Date: Fri, 15 Nov 2013 00:13:42 +0800 [thread overview]
Message-ID: <1384445622-12346-41-git-send-email-bergwolf@gmail.com> (raw)
In-Reply-To: <1384445622-12346-1-git-send-email-bergwolf@gmail.com>
From: Andriy Skulysh <Andriy_Skulysh@xyratex.com>
Flock deadlocks are checked on the first attempt to grant
the flock only. If we try again to grant it after its
blocking lock is cancelled, we don't check for deadlocks
which also may exist.
Perform deadlock detection during reprocess
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-1715
Lustre-change: http://review.whamcloud.com/3553
Signed-off-by: Andriy Skulysh <Andriy_Skulysh@xyratex.com>
Reviewed-by: Vitaly Fertman <vitaly_fertman@xyratex.com>
Reviewed-by: Bruce Korb <bruce_korb@xyratex.com>
Reviewed-by: Keith Mannthey <keith.mannthey@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
Signed-off-by: Peng Tao <bergwolf@gmail.com>
Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
---
.../lustre/lustre/include/lustre/lustre_idl.h | 5 ++-
.../lustre/lustre/include/lustre_dlm_flags.h | 14 +++++--
drivers/staging/lustre/lustre/ldlm/ldlm_flock.c | 41 ++++++++++++++++++--
drivers/staging/lustre/lustre/llite/llite_lib.c | 3 +-
.../lustre/lustre/obdclass/lprocfs_status.c | 1 +
drivers/staging/lustre/lustre/ptlrpc/wiretest.c | 2 +
6 files changed, 57 insertions(+), 9 deletions(-)
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index 9d3bc29..7bdb24f 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -1292,6 +1292,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
#define OBD_CONNECT_LIGHTWEIGHT 0x1000000000000ULL/* lightweight connection */
#define OBD_CONNECT_SHORTIO 0x2000000000000ULL/* short io */
#define OBD_CONNECT_PINGLESS 0x4000000000000ULL/* pings not required */
+#define OBD_CONNECT_FLOCK_DEAD 0x8000000000000ULL/* improved flock deadlock detection */
+
/* XXX README XXX:
* Please DO NOT add flag values here before first ensuring that this same
* flag value is not in use on some other branch. Please clear any such
@@ -1329,7 +1331,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
OBD_CONNECT_EINPROGRESS | \
OBD_CONNECT_LIGHTWEIGHT | OBD_CONNECT_UMASK | \
OBD_CONNECT_LVB_TYPE | OBD_CONNECT_LAYOUTLOCK |\
- OBD_CONNECT_PINGLESS | OBD_CONNECT_MAX_EASIZE)
+ OBD_CONNECT_PINGLESS | OBD_CONNECT_MAX_EASIZE |\
+ OBD_CONNECT_FLOCK_DEAD)
#define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
index a632217..855e18f 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
@@ -35,10 +35,10 @@
#ifndef LDLM_ALL_FLAGS_MASK
/** l_flags bits marked as "all_flags" bits */
-#define LDLM_FL_ALL_FLAGS_MASK 0x00FFFFFFC08F132FULL
+#define LDLM_FL_ALL_FLAGS_MASK 0x00FFFFFFC08F932FULL
/** l_flags bits marked as "ast" bits */
-#define LDLM_FL_AST_MASK 0x0000000080000000ULL
+#define LDLM_FL_AST_MASK 0x0000000080008000ULL
/** l_flags bits marked as "blocked" bits */
#define LDLM_FL_BLOCKED_MASK 0x000000000000000EULL
@@ -56,7 +56,7 @@
#define LDLM_FL_LOCAL_ONLY_MASK 0x00FFFFFF00000000ULL
/** l_flags bits marked as "on_wire" bits */
-#define LDLM_FL_ON_WIRE_MASK 0x00000000C08F132FULL
+#define LDLM_FL_ON_WIRE_MASK 0x00000000C08F932FULL
/** extent, mode, or resource changed */
#define LDLM_FL_LOCK_CHANGED 0x0000000000000001ULL // bit 0
@@ -114,6 +114,12 @@
#define ldlm_set_has_intent(_l) LDLM_SET_FLAG(( _l), 1ULL << 12)
#define ldlm_clear_has_intent(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 12)
+/** flock deadlock detected */
+#define LDLM_FL_FLOCK_DEADLOCK 0x0000000000008000ULL // bit 15
+#define ldlm_is_flock_deadlock(_l) LDLM_TEST_FLAG(( _l), 1ULL << 15)
+#define ldlm_set_flock_deadlock(_l) LDLM_SET_FLAG(( _l), 1ULL << 15)
+#define ldlm_clear_flock_deadlock(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 15)
+
/** discard (no writeback) on cancel */
#define LDLM_FL_DISCARD_DATA 0x0000000000010000ULL // bit 16
#define ldlm_is_discard_data(_l) LDLM_TEST_FLAG(( _l), 1ULL << 16)
@@ -390,6 +396,7 @@ static int hf_lustre_ldlm_fl_ast_sent = -1;
static int hf_lustre_ldlm_fl_replay = -1;
static int hf_lustre_ldlm_fl_intent_only = -1;
static int hf_lustre_ldlm_fl_has_intent = -1;
+static int hf_lustre_ldlm_fl_flock_deadlock = -1;
static int hf_lustre_ldlm_fl_discard_data = -1;
static int hf_lustre_ldlm_fl_no_timeout = -1;
static int hf_lustre_ldlm_fl_block_nowait = -1;
@@ -431,6 +438,7 @@ const value_string lustre_ldlm_flags_vals[] = {
{LDLM_FL_REPLAY, "LDLM_FL_REPLAY"},
{LDLM_FL_INTENT_ONLY, "LDLM_FL_INTENT_ONLY"},
{LDLM_FL_HAS_INTENT, "LDLM_FL_HAS_INTENT"},
+ {LDLM_FL_FLOCK_DEADLOCK, "LDLM_FL_FLOCK_DEADLOCK"},
{LDLM_FL_DISCARD_DATA, "LDLM_FL_DISCARD_DATA"},
{LDLM_FL_NO_TIMEOUT, "LDLM_FL_NO_TIMEOUT"},
{LDLM_FL_BLOCK_NOWAIT, "LDLM_FL_BLOCK_NOWAIT"},
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
index 182773b..6324eff 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
@@ -205,6 +205,26 @@ ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock)
return 0;
}
+static void ldlm_flock_cancel_on_deadlock(struct ldlm_lock *lock,
+ struct list_head *work_list)
+{
+ CDEBUG(D_INFO, "reprocess deadlock req=%p\n", lock);
+
+ if ((exp_connect_flags(lock->l_export) &
+ OBD_CONNECT_FLOCK_DEAD) == 0) {
+ CERROR("deadlock found, but client doesn't "
+ "support flock canceliation\n");
+ } else {
+ LASSERT(lock->l_completion_ast);
+ LASSERT((lock->l_flags & LDLM_FL_AST_SENT) == 0);
+ lock->l_flags |= LDLM_FL_AST_SENT | LDLM_FL_CANCEL_ON_BLOCK |
+ LDLM_FL_FLOCK_DEADLOCK;
+ ldlm_flock_blocking_unlink(lock);
+ ldlm_resource_unlink_lock(lock);
+ ldlm_add_ast_work_item(lock, NULL, work_list);
+ }
+}
+
/**
* Process a granting attempt for flock lock.
* Must be called under ns lock held.
@@ -272,6 +292,7 @@ reprocess:
}
}
} else {
+ int reprocess_failed = 0;
lockmode_verify(mode);
/* This loop determines if there are existing locks
@@ -293,8 +314,15 @@ reprocess:
if (!ldlm_flocks_overlap(lock, req))
continue;
- if (!first_enq)
- return LDLM_ITER_CONTINUE;
+ if (!first_enq) {
+ reprocess_failed = 1;
+ if (ldlm_flock_deadlock(req, lock)) {
+ ldlm_flock_cancel_on_deadlock(req,
+ work_list);
+ return LDLM_ITER_CONTINUE;
+ }
+ continue;
+ }
if (*flags & LDLM_FL_BLOCK_NOWAIT) {
ldlm_flock_destroy(req, mode, *flags);
@@ -330,6 +358,8 @@ reprocess:
*flags |= LDLM_FL_BLOCK_GRANTED;
return LDLM_ITER_STOP;
}
+ if (reprocess_failed)
+ return LDLM_ITER_CONTINUE;
}
if (*flags & LDLM_FL_TEST_LOCK) {
@@ -646,7 +676,10 @@ granted:
/* ldlm_lock_enqueue() has already placed lock on the granted list. */
list_del_init(&lock->l_res_link);
- if (flags & LDLM_FL_TEST_LOCK) {
+ if (lock->l_flags & LDLM_FL_FLOCK_DEADLOCK) {
+ LDLM_DEBUG(lock, "client-side enqueue deadlock received");
+ rc = -EDEADLK;
+ } else if (flags & LDLM_FL_TEST_LOCK) {
/* fcntl(F_GETLK) request */
/* The old mode was saved in getlk->fl_type so that if the mode
* in the lock changes we can decref the appropriate refcount.*/
@@ -675,7 +708,7 @@ granted:
ldlm_process_flock_lock(lock, &noreproc, 1, &err, NULL);
}
unlock_res_and_lock(lock);
- return 0;
+ return rc;
}
EXPORT_SYMBOL(ldlm_flock_completion_ast);
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 93c67e5..18e3db2 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -205,7 +205,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
OBD_CONNECT_EINPROGRESS |
OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS |
- OBD_CONNECT_MAX_EASIZE;
+ OBD_CONNECT_MAX_EASIZE |
+ OBD_CONNECT_FLOCK_DEAD;
if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
data->ocd_connect_flags |= OBD_CONNECT_SOM;
diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
index 9008861..3329055 100644
--- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
+++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
@@ -531,6 +531,7 @@ static const char *obd_connect_names[] = {
"lightweight_conn",
"short_io",
"pingless",
+ "flock_deadlock",
"unknown",
NULL
};
diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
index 6c31947..f7af706 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
@@ -1153,6 +1153,8 @@ void lustre_assert_wire_constants(void)
OBD_CONNECT_SHORTIO);
LASSERTF(OBD_CONNECT_PINGLESS == 0x4000000000000ULL, "found 0x%.16llxULL\n",
OBD_CONNECT_PINGLESS);
+ LASSERTF(OBD_CONNECT_FLOCK_DEAD == 0x8000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_FLOCK_DEAD);
LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
(unsigned)OBD_CKSUM_CRC32);
LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",
--
1.7.9.5
next prev parent reply other threads:[~2013-11-14 16:18 UTC|newest]
Thread overview: 67+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-11-14 16:13 [PATCH 00/40] staging/lustre: patch bomb 1 Peng Tao
2013-11-14 16:13 ` [PATCH 01/40] staging/lustre/llite: restore ll_fiemap Peng Tao
2013-11-14 16:13 ` [PATCH 02/40] staging/lustre: remove lu_target.h Peng Tao
2013-11-14 16:13 ` [PATCH 03/40] staging/lustre: remove llog_server.c Peng Tao
2013-11-14 16:13 ` [PATCH 04/40] staging/lustre/llite: Access to released file trigs a restore Peng Tao
2013-11-15 4:09 ` Greg Kroah-Hartman
2013-11-15 9:55 ` Peng Tao
2013-11-16 10:36 ` Dilger, Andreas
2013-11-16 19:59 ` Greg Kroah-Hartman
2013-11-18 3:07 ` Peng Tao
2013-11-18 4:39 ` Greg Kroah-Hartman
2013-11-18 6:07 ` Peng Tao
2013-11-18 13:52 ` Greg Kroah-Hartman
2013-11-19 13:29 ` Peng Tao
2013-11-14 16:13 ` [PATCH 05/40] staging/lustre: validate open handle cookies Peng Tao
2013-11-15 4:13 ` Greg Kroah-Hartman
2013-11-15 10:22 ` Peng Tao
2013-11-15 20:57 ` Greg Kroah-Hartman
2013-11-16 11:20 ` Dilger, Andreas
2013-11-16 19:50 ` Greg Kroah-Hartman
2013-11-18 2:36 ` Peng Tao
2013-11-18 4:35 ` Greg Kroah-Hartman
2013-11-18 6:18 ` Peng Tao
2013-11-18 8:57 ` Dilger, Andreas
2013-11-14 16:13 ` [PATCH 06/40] staging/lustre/llite: use correct FID in ll_och_fill() Peng Tao
2013-11-14 16:13 ` [PATCH 07/40] staging/lustre/hsm: Implementation of exclusive open Peng Tao
2013-11-15 4:17 ` Greg Kroah-Hartman
2013-11-15 10:26 ` Peng Tao
2013-11-14 16:13 ` [PATCH 08/40] staging/lustre/lnet: Fix assert on empty group in selftest module Peng Tao
2013-11-14 16:13 ` [PATCH 09/40] staging/lustre/ldlm: fix resource/fid check, use DLDLMRES Peng Tao
2013-11-14 16:13 ` [PATCH 10/40] staging/lustre/server: use unified request handler for MGS Peng Tao
2013-11-14 16:13 ` [PATCH 11/40] staging/lustre/llog: MGC to use OSD API for backup logs Peng Tao
2013-11-14 16:13 ` [PATCH 12/40] staging/lustre/nfs: writing to new files will return ENOENT Peng Tao
2013-11-14 16:22 ` Cheng Shao
2013-11-14 16:28 ` Peng Tao
2013-11-15 4:01 ` Greg Kroah-Hartman
2013-11-14 16:13 ` [PATCH 13/40] staging/lustre/autoconf: remove vectored fops tests Peng Tao
2013-11-14 16:13 ` [PATCH 14/40] staging/lustre/autoconf: remove LIBCFS_HAVE_IS_COMPAT_TASK test Peng Tao
2013-11-14 16:13 ` [PATCH 15/40] staging/lustre/llog: fix return value of llog_alloc_handle Peng Tao
2013-11-14 16:13 ` [PATCH 16/40] staging/lustre/lov: convert magic to host-endian in lov_dump_lmm() Peng Tao
2013-11-14 16:13 ` [PATCH 17/40] staging/lustre/ptlrpc: Fix race during exp_flock_hash creation Peng Tao
2013-11-14 16:13 ` [PATCH 18/40] staging/lustre/mdc: prevent fall through in mdc_iocontrol() Peng Tao
2013-11-14 16:13 ` [PATCH 19/40] staging/lustre/lu: shrink lu_object by 8 bytes on x86_64 Peng Tao
2013-11-14 16:13 ` [PATCH 20/40] staging/lustre/mdt: HSM coordinator client interface Peng Tao
2013-11-14 16:13 ` [PATCH 21/40] staging/lustre/mdt: HSM coordinator agent interface Peng Tao
2013-11-14 16:13 ` [PATCH 22/40] staging/lustre/scrub: OI scrub on OST Peng Tao
2013-11-14 16:13 ` [PATCH 23/40] staging/lustre/scrub: control OI scrub on OST from user space Peng Tao
2013-11-14 16:13 ` [PATCH 24/40] staging/lustre/llite: don't check for O_CREAT in it_create_mode Peng Tao
2013-11-14 16:13 ` [PATCH 25/40] staging/lustre/build: clean up unused variables and dead code Peng Tao
2013-11-14 16:13 ` [PATCH 26/40] staging/lustre/build: fix compilation issue with is_compat_task Peng Tao
2013-11-14 16:13 ` [PATCH 27/40] staging/lustre/ptlrpc: Fix a crash when dereferencing NULL pointer Peng Tao
2013-11-14 16:13 ` [PATCH 28/40] staging/lustre/hsm: Add hsm_release feature Peng Tao
2013-11-14 16:13 ` [PATCH 29/40] staging/lustre/llite: extended attribute cache Peng Tao
2013-11-14 16:13 ` [PATCH 30/40] staging/lustre/xattr: separate ACL and XATTR caches Peng Tao
2013-11-14 16:13 ` [PATCH 31/40] staging/lustre/lnet: Add LNet Router Priority parameter Peng Tao
2013-11-14 16:13 ` [PATCH 32/40] staging/lustre/api: HSM import uses new released pattern Peng Tao
2013-11-14 16:13 ` [PATCH 33/40] staging/lustre/target: move OUT to the unified target code Peng Tao
2013-11-14 16:13 ` [PATCH 34/40] staging/lustre/seq: unified SEQ handler Peng Tao
2013-11-14 16:13 ` [PATCH 35/40] staging/lustre/llite: remove ll_d_root_ops Peng Tao
2013-11-14 16:13 ` [PATCH 36/40] staging/lustre/llite: pass correct pointer to obd_iocontrol() Peng Tao
2013-11-14 16:13 ` [PATCH 37/40] staging/lustre/idl: remove LASSERT/CLASSERT from lustre_idl.h Peng Tao
2013-11-14 16:13 ` [PATCH 38/40] staging/lustre/mgs: set_param -P option that sets value permanently Peng Tao
2013-11-14 16:13 ` [PATCH 39/40] staging/lustre/utils: HSM Posix CopyTool Peng Tao
2013-11-14 16:13 ` Peng Tao [this message]
2013-11-15 3:59 ` [PATCH 00/40] staging/lustre: patch bomb 1 Greg Kroah-Hartman
2013-11-15 9:51 ` Peng Tao
2013-11-15 20:54 ` Greg Kroah-Hartman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1384445622-12346-41-git-send-email-bergwolf@gmail.com \
--to=bergwolf@gmail.com \
--cc=Andriy_Skulysh@xyratex.com \
--cc=andreas.dilger@intel.com \
--cc=gregkh@linuxfoundation.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).