public inbox for gfs2@lists.linux.dev
 help / color / mirror / Atom feed
* [PATCH dlm/next 1/5] dlm: new max length for debugfs entry name
@ 2024-11-13 16:46 Alexander Aring
  2024-11-13 16:46 ` [PATCH dlm/next 2/5] dlm: introduce lockspace control debugfs entry Alexander Aring
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Alexander Aring @ 2024-11-13 16:46 UTC (permalink / raw)
  To: teigland; +Cc: gfs2, aahringo

Since commit 16e98462b764 ("dlm: remove callback queue debugfs
functionality") the callback workqueue debugfs entry was being removed
again because lkb refcount issues with ongoing workers. Use the next
maximum name length for debugfs entry possible.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
---
 fs/dlm/debug_fs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 700a0cbb2f14..c76320acefc1 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -762,7 +762,7 @@ void dlm_delete_debug_comms_file(void *ctx)
 void dlm_create_debug_file(struct dlm_ls *ls)
 {
 	/* Reserve enough space for the longest file name */
-	char name[DLM_LOCKSPACE_LEN + sizeof("_queued_asts")];
+	char name[DLM_LOCKSPACE_LEN + sizeof("_waiters")];
 
 	/* format 1 */
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH dlm/next 2/5] dlm: introduce lockspace control debugfs entry
  2024-11-13 16:46 [PATCH dlm/next 1/5] dlm: new max length for debugfs entry name Alexander Aring
@ 2024-11-13 16:46 ` Alexander Aring
  2024-11-13 16:46 ` [PATCH dlm/next 3/5] dlm: extend debugfs to manipulate more DLM states Alexander Aring
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Alexander Aring @ 2024-11-13 16:46 UTC (permalink / raw)
  To: teigland; +Cc: gfs2, aahringo

To create nowadays a kernel lockspace we need to call the kernel API.
This will provide a debugfs entry to do so. It is named "__ctrl" as the
debugfs entries my collide with lockspace names. It still can but it is
more unlikely as it has a "__" prefix.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
---
 fs/dlm/debug_fs.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index c76320acefc1..49ad31120286 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -26,6 +26,7 @@ static struct mutex debug_buf_lock;
 
 static struct dentry *dlm_root;
 static struct dentry *dlm_comms;
+static struct dentry *dlm_ctrl;
 
 static char *print_lockmode(int mode)
 {
@@ -759,6 +760,36 @@ void dlm_delete_debug_comms_file(void *ctx)
 	debugfs_remove(ctx);
 }
 
+static ssize_t dlm_ctrl_write(struct file *fp, const char __user *user_buf,
+			      size_t count, loff_t *ppos)
+{
+	char clname[DLM_LOCKSPACE_LEN + 1] = {};
+	char lsname[DLM_LOCKSPACE_LEN + 1] = {};
+	dlm_lockspace_t *ls;
+	char buf[512] = {};
+	int rv, n;
+
+	if (copy_from_user(buf, user_buf,
+			   min_t(size_t, sizeof(buf) - 1, count)))
+		return -EFAULT;
+
+	n = sscanf(buf, "%" __stringify(DLM_LOCKSPACE_LEN) "s %" __stringify(DLM_LOCKSPACE_LEN) "s",
+		   lsname, clname);
+	if (n != 2)
+		return -EINVAL;
+
+	rv = dlm_new_lockspace(lsname, clname, 0, 0, NULL, NULL, NULL, &ls);
+	if (rv)
+		return rv;
+
+	return count;
+}
+
+static const struct file_operations dlm_ctrl_fops = {
+	.open	= simple_open,
+	.write	= dlm_ctrl_write,
+};
+
 void dlm_create_debug_file(struct dlm_ls *ls)
 {
 	/* Reserve enough space for the longest file name */
@@ -816,6 +847,8 @@ void __init dlm_register_debugfs(void)
 	mutex_init(&debug_buf_lock);
 	dlm_root = debugfs_create_dir("dlm", NULL);
 	dlm_comms = debugfs_create_dir("comms", dlm_root);
+	dlm_ctrl = debugfs_create_file("__ctrl", 0644, dlm_root, NULL,
+				       &dlm_ctrl_fops);
 }
 
 void dlm_unregister_debugfs(void)
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH dlm/next 3/5] dlm: extend debugfs to manipulate more DLM states
  2024-11-13 16:46 [PATCH dlm/next 1/5] dlm: new max length for debugfs entry name Alexander Aring
  2024-11-13 16:46 ` [PATCH dlm/next 2/5] dlm: introduce lockspace control debugfs entry Alexander Aring
@ 2024-11-13 16:46 ` Alexander Aring
  2024-11-13 16:46 ` [PATCH dlm/next 4/5] dlm: fix middle conversion cases in DLM recovery Alexander Aring
  2024-11-13 16:46 ` [PATCH dlm/next 5/5] dlm: more debug info on invalid lock requests Alexander Aring
  3 siblings, 0 replies; 5+ messages in thread
From: Alexander Aring @ 2024-11-13 16:46 UTC (permalink / raw)
  To: teigland; +Cc: gfs2, aahringo

To reproduce more issues with the DLM recovery by using the DLM debugfs
developer backdoor to force manipulating the DLM lkb/rsb states.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
---
 fs/dlm/debug_fs.c     | 16 +++++++++-------
 fs/dlm/dlm_internal.h |  6 ++++++
 fs/dlm/lock.c         | 43 +++++++++++++++++++++++++++++++++++++++----
 fs/dlm/lock.h         |  4 +++-
 4 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 49ad31120286..54ad11e1cd2c 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -502,11 +502,11 @@ static int table_open2(struct inode *inode, struct file *file)
 static ssize_t table_write2(struct file *file, const char __user *user_buf,
 			    size_t count, loff_t *ppos)
 {
+	int n, len, lkb_status, error, from_nodeid, dir_nodeid, master_nodeid, rqmode, grmode;
 	struct seq_file *seq = file->private_data;
-	int n, len, lkb_nodeid, lkb_status, error;
 	char name[DLM_RESNAME_MAXLEN + 1] = {};
+	unsigned int lkb_dflags, lkb_iflags;
 	struct dlm_ls *ls = seq->private;
-	unsigned int lkb_flags;
 	char buf[256] = {};
 	uint32_t lkb_id;
 
@@ -514,14 +514,16 @@ static ssize_t table_write2(struct file *file, const char __user *user_buf,
 			   min_t(size_t, sizeof(buf) - 1, count)))
 		return -EFAULT;
 
-	n = sscanf(buf, "%x %" __stringify(DLM_RESNAME_MAXLEN) "s %x %d %d",
-		   &lkb_id, name, &lkb_flags, &lkb_nodeid, &lkb_status);
-	if (n != 5)
+	n = sscanf(buf, "%x %" __stringify(DLM_RESNAME_MAXLEN) "s %x %x %d %d %d %d %d %d",
+		   &lkb_id, name, &lkb_iflags, &lkb_dflags, &lkb_status, &from_nodeid,
+		   &dir_nodeid, &master_nodeid, &rqmode, &grmode);
+	if (n != 10)
 		return -EINVAL;
 
 	len = strnlen(name, DLM_RESNAME_MAXLEN);
-	error = dlm_debug_add_lkb(ls, lkb_id, name, len, lkb_flags,
-				  lkb_nodeid, lkb_status);
+	error = dlm_debug_add_lkb(ls, lkb_id, name, len, lkb_iflags, lkb_dflags,
+				  lkb_status, from_nodeid, dir_nodeid,
+				  master_nodeid, rqmode, grmode);
 	if (error)
 		return error;
 
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index d534a4bc162b..6ee6179e16a8 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -794,6 +794,12 @@ static inline void dlm_set_flags_val(unsigned long *addr, uint32_t val,
 	}
 }
 
+static inline void dlm_set_iflags_val(struct dlm_lkb *lkb, uint32_t val)
+{
+	dlm_set_flags_val(&lkb->lkb_iflags, val, __DLM_IFL_MIN_BIT,
+			  __DLM_IFL_MAX_BIT);
+}
+
 static inline void dlm_set_dflags_val(struct dlm_lkb *lkb, uint32_t val)
 {
 	dlm_set_flags_val(&lkb->lkb_dflags, val, __DLM_DFL_MIN_BIT,
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 966a926c301b..4219e0025fa2 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -6271,10 +6271,30 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
 	return error;
 }
 
+static void debug_bastfn(void *astparam, int mode)
+{
+	const struct dlm_lkb *lkb = astparam;
+
+	log_print("%s: lkb_id 0x%x mode %u", __func__, lkb->lkb_id, mode);
+}
+
+static void debug_astfn(void *astparam)
+{
+	const struct dlm_lkb *lkb = astparam;
+	const struct dlm_lksb *lksb;
+
+	lksb = lkb->lkb_lksb;
+
+	log_print("%s: lkb_id 0x%x status %d", __func__, lkb->lkb_id, lksb->sb_status);
+}
+
 /* debug functionality */
 int dlm_debug_add_lkb(struct dlm_ls *ls, uint32_t lkb_id, char *name, int len,
-		      int lkb_nodeid, unsigned int lkb_dflags, int lkb_status)
+		      unsigned int lkb_iflags, unsigned int lkb_dflags,
+		      int lkb_status, int from_nodeid, int dir_nodeid,
+		      int master_nodeid, int rqmode, int grmode)
 {
+	int our_nodeid = dlm_our_nodeid();
 	struct dlm_lksb *lksb;
 	struct dlm_lkb *lkb;
 	struct dlm_rsb *r;
@@ -6294,14 +6314,18 @@ int dlm_debug_add_lkb(struct dlm_ls *ls, uint32_t lkb_id, char *name, int len,
 		return error;
 	}
 
+	dlm_set_iflags_val(lkb, lkb_iflags);
 	dlm_set_dflags_val(lkb, lkb_dflags);
-	lkb->lkb_nodeid = lkb_nodeid;
 	lkb->lkb_lksb = lksb;
+	lkb->lkb_rqmode = rqmode;
+	lkb->lkb_grmode = grmode;
+	lkb->lkb_astfn = debug_astfn;
+	lkb->lkb_bastfn = debug_bastfn;
 	/* user specific pointer, just don't have it NULL for kernel locks */
 	if (~lkb_dflags & BIT(DLM_DFL_USER_BIT))
-		lkb->lkb_astparam = (void *)0xDEADBEEF;
+		lkb->lkb_astparam = lkb;
 
-	error = find_rsb(ls, name, len, 0, R_REQUEST, &r);
+	error = find_rsb(ls, name, len, from_nodeid, R_REQUEST, &r);
 	if (error) {
 		kfree(lksb);
 		__put_lkb(ls, lkb);
@@ -6309,6 +6333,17 @@ int dlm_debug_add_lkb(struct dlm_ls *ls, uint32_t lkb_id, char *name, int len,
 	}
 
 	lock_rsb(r);
+	if (dir_nodeid)
+		r->res_dir_nodeid = dir_nodeid;
+
+	r->res_master_nodeid = master_nodeid;
+	/* stupid second meaning of res_master_nodeid */
+	if (master_nodeid == our_nodeid)
+		r->res_nodeid = 0;
+	else
+		r->res_nodeid = master_nodeid;
+	lkb->lkb_nodeid = r->res_nodeid;
+
 	attach_lkb(r, lkb);
 	add_lkb(r, lkb, lkb_status);
 	unlock_rsb(r);
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index b23d7b854ed4..66ba9688f9c4 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -60,7 +60,9 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
 int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
 void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
 int dlm_debug_add_lkb(struct dlm_ls *ls, uint32_t lkb_id, char *name, int len,
-		      int lkb_nodeid, unsigned int lkb_flags, int lkb_status);
+		      unsigned int lkb_iflags, unsigned int lkb_dflags,
+		      int lkb_status, int from_nodeid, int dir_nodeid,
+		      int master_nodeid, int rqmode, int grmode);
 int dlm_debug_add_lkb_to_waiters(struct dlm_ls *ls, uint32_t lkb_id,
 				 int mstype, int to_nodeid);
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH dlm/next 4/5] dlm: fix middle conversion cases in DLM recovery
  2024-11-13 16:46 [PATCH dlm/next 1/5] dlm: new max length for debugfs entry name Alexander Aring
  2024-11-13 16:46 ` [PATCH dlm/next 2/5] dlm: introduce lockspace control debugfs entry Alexander Aring
  2024-11-13 16:46 ` [PATCH dlm/next 3/5] dlm: extend debugfs to manipulate more DLM states Alexander Aring
@ 2024-11-13 16:46 ` Alexander Aring
  2024-11-13 16:46 ` [PATCH dlm/next 5/5] dlm: more debug info on invalid lock requests Alexander Aring
  3 siblings, 0 replies; 5+ messages in thread
From: Alexander Aring @ 2024-11-13 16:46 UTC (permalink / raw)
  To: teigland; +Cc: gfs2, aahringo

We saw some issue after recovery and a following
dlm_lock(QUECVT | CONVERT) returned with -EINVAL. After investigation it
came out the lkb grmode was in an invalid state "-1" although the lkb was
in granted state.

I was able to reproduce the issue with the following DLM lkb/rsb state
manipulation:

steps to reproduce (3 nodes cluster):

Recovery conversion case "recover_convert_waiter()":

Node 1:
$ echo "test $CLNAME" > /sys/kernel/debug/dlm/__ctrl
$ echo "1337 foobaaar 0x0 0x0 3 3 2 2 2 3" >
/sys/kernel/debug/dlm/test_locks
$ echo "1337 2 2" > /sys/kernel/debug/dlm/test_waiters
Node 2:
$ echo "test $CLNAME" > /sys/kernel/debug/dlm/__ctrl
Node 3:
$ echo "test $CLNAME" > /sys/kernel/debug/dlm/__ctrl

`dlm_tool lockdebug test` before recovery dumps:
Node 1:
---
Resource len  8  "foobaaar"
Master:2         flags 000000c8 first_lkid 0 root 0 recover 0 locks 0
Convert
00001337 PR (CW) Master:   2 00000000  wait 02

Expecting reply
nodeid  2 msg convert lkid 00001337 resource "foobaaar"
---

Do a fence of node 2.

`dlm_tool lockdebug test` after recovery dumps:

---
Resource len  8  "foobaaar"
Master           flags 000000c8 first_lkid 0 root 0 recover 0 locks 0
Granted
00001337 IV
---

The lkb ends up in an invalid granted state with the lock mode invalid.
A following lock conversion with QUECVT will always return -EINVAL.

This invalid case is only appears when we have a waiter for a PR <-> CW
lock conversion that waits for an answer from the master lock node but
the master was being fenced. As this case is mainly handled in
"recover_convert_waiter()".

There are more cases to check but this is so far the only one I can
confirm that there are problems with and should be fixed.

If the above case occurs it leaves the lkb in a invalid state of
"grmode == rqmode" on the convertqueue that "recover_conversion()"
handles.

After this patch the above invalid state does not appear anymore. It
ends in the following lock state after fencing node 2 and recovery is
being triggered:

`dlm_tool lockdebug test` after recovery dumps:
Node 1:
---
Resource len  8  "foobaaar"
Master           flags 000000c8 first_lkid 0 root 0 recover 0 locks 0
Granted
00001337 CW
---

That successful indicates the new master is node 1 and it got the CW
lock state granted as requested before recovery.

There is however a second case handled when a new master is elected and
"receive_rcom_lock_args()" recovers lock requests from non master nodes
to the new master. The condition in "receive_rcom_lock_args()" of
"rl->rl_wait_type == cpu_to_le16(DLM_MSG_CONVERT)" however can't be true
as "recover_convert_waiter()" would call "_receive_convert_reply()" and
remove the current waiter of the lkb if there is one. That always would
have ended in "wait_type == 0". In case of a new master the new master
will never receive the lkb with "wait_type != 0".

I tried to find problems in the current handling of
"receive_rcom_lock_args()" case and I couldn't find any. However there is
a condition that seems never to be worked correctly.

This solution was co-developed with the original DLM author David
Teigland that changes this case as it covers potential all other non yet
easy to reproduce cases for PR <-> CW that are on granted/convertqueue.

Co-developed: David Teigland <teigland@redhat.com>
Signed-off-by: Alexander Aring <aahringo@redhat.com>
---
 fs/dlm/lock.c    | 20 ++++++++++++--------
 fs/dlm/recover.c | 40 +++++++++++++++++++++++++++-------------
 2 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 4219e0025fa2..1e809f591d1a 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -4999,16 +4999,19 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb,
 				   struct dlm_message *ms_local)
 {
 	if (middle_conversion(lkb)) {
+		log_rinfo(ls, "%s %x middle convert in progress", __func__,
+			 lkb->lkb_id);
+
+		/* We sent this lock to the new master. The new master will
+		 * tell us when it's granted.  We no longer need a reply, so
+		 * use a fake reply to put the lkb into the right state.
+		 */
 		hold_lkb(lkb);
 		memset(ms_local, 0, sizeof(struct dlm_message));
 		ms_local->m_type = cpu_to_le32(DLM_MSG_CONVERT_REPLY);
 		ms_local->m_result = cpu_to_le32(to_dlm_errno(-EINPROGRESS));
 		ms_local->m_header.h_nodeid = cpu_to_le32(lkb->lkb_nodeid);
 		_receive_convert_reply(lkb, ms_local, true);
-
-		/* Same special case as in receive_rcom_lock_args() */
-		lkb->lkb_grmode = DLM_LOCK_IV;
-		rsb_set_flag(lkb->lkb_resource, RSB_RECOVER_CONVERT);
 		unhold_lkb(lkb);
 
 	} else if (lkb->lkb_rqmode >= lkb->lkb_grmode) {
@@ -5555,10 +5558,11 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
 	   The real granted mode of these converting locks cannot be determined
 	   until all locks have been rebuilt on the rsb (recover_conversion) */
 
-	if (rl->rl_wait_type == cpu_to_le16(DLM_MSG_CONVERT) &&
-	    middle_conversion(lkb)) {
-		rl->rl_status = DLM_LKSTS_CONVERT;
-		lkb->lkb_grmode = DLM_LOCK_IV;
+	if (rl->rl_status == DLM_LKSTS_CONVERT && middle_conversion(lkb)) {
+		/* We may need to adjust grmode depending on other granted locks. */
+		log_limit(ls, "%s %x middle convert gr %d rq %d remote %d %x",
+			  __func__, lkb->lkb_id, lkb->lkb_grmode,
+			  lkb->lkb_rqmode, lkb->lkb_nodeid, lkb->lkb_remid);
 		rsb_set_flag(r, RSB_RECOVER_CONVERT);
 	}
 
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index 2e1169c81c6e..5a7b4b2016c9 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -811,33 +811,47 @@ static void recover_lvb(struct dlm_rsb *r)
 }
 
 /* All master rsb's flagged RECOVER_CONVERT need to be looked at.  The locks
-   converting PR->CW or CW->PR need to have their lkb_grmode set. */
+ * converting PR->CW or CW->PR may need to have their lkb_grmode changed.
+ */
 
 static void recover_conversion(struct dlm_rsb *r)
 {
 	struct dlm_ls *ls = r->res_ls;
+	uint32_t other_lkid = 0;
+	int other_grmode = -1;
 	struct dlm_lkb *lkb;
-	int grmode = -1;
 
 	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
 		if (lkb->lkb_grmode == DLM_LOCK_PR ||
 		    lkb->lkb_grmode == DLM_LOCK_CW) {
-			grmode = lkb->lkb_grmode;
+			other_grmode = lkb->lkb_grmode;
+			other_lkid = lkb->lkb_id;
 			break;
 		}
 	}
 
+	if (other_grmode == -1) {
+		log_limit(ls, "%s %x gr %d rq %d, remote %d %x, no granted PR/CW found",
+			  __func__, lkb->lkb_id, lkb->lkb_grmode,
+			  lkb->lkb_rqmode, lkb->lkb_nodeid,
+			  lkb->lkb_remid);
+		return;
+	}
+
 	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
-		if (lkb->lkb_grmode != DLM_LOCK_IV)
-			continue;
-		if (grmode == -1) {
-			log_debug(ls, "recover_conversion %x set gr to rq %d",
-				  lkb->lkb_id, lkb->lkb_rqmode);
-			lkb->lkb_grmode = lkb->lkb_rqmode;
-		} else {
-			log_debug(ls, "recover_conversion %x set gr %d",
-				  lkb->lkb_id, grmode);
-			lkb->lkb_grmode = grmode;
+		/* Lock recovery created incompatible granted modes, so
+		 * change the granted mode of the converting lock to
+		 * NL. The rqmode of the converting lock should be CW,
+		 * which means the converting lock should be granted at
+		 * the end of recovery.
+		 */
+		if (((lkb->lkb_grmode == DLM_LOCK_PR) && (other_grmode == DLM_LOCK_CW)) ||
+		    ((lkb->lkb_grmode == DLM_LOCK_CW) && (other_grmode == DLM_LOCK_PR))) {
+			log_limit(ls, "%s %x gr %d rq %d, remote %d %x, other_lkid %u, other gr %d, set gr=NL",
+				  __func__, lkb->lkb_id, lkb->lkb_grmode,
+				  lkb->lkb_rqmode, lkb->lkb_nodeid,
+				  lkb->lkb_remid, other_lkid, other_grmode);
+			lkb->lkb_grmode = DLM_LOCK_NL;
 		}
 	}
 }
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH dlm/next 5/5] dlm: more debug info on invalid lock requests
  2024-11-13 16:46 [PATCH dlm/next 1/5] dlm: new max length for debugfs entry name Alexander Aring
                   ` (2 preceding siblings ...)
  2024-11-13 16:46 ` [PATCH dlm/next 4/5] dlm: fix middle conversion cases in DLM recovery Alexander Aring
@ 2024-11-13 16:46 ` Alexander Aring
  3 siblings, 0 replies; 5+ messages in thread
From: Alexander Aring @ 2024-11-13 16:46 UTC (permalink / raw)
  To: teigland; +Cc: gfs2, aahringo

Recently there was an issue coming up about an invalid lkb state when a
convert was triggered. After investigation an recovery issue was the
root of the problem. However we only saw it because QUECVT flag was
being set that was leading DLM to return with -EINVAL on such invalid
state. To cover also such case without QUECVT is set we introduce more
conditions that are required to be checked before initial a DLM lock
conversion.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
---
 fs/dlm/lock.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 1e809f591d1a..7217cc566175 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -2830,6 +2830,11 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
 		if (test_bit(DLM_IFL_MSTCPY_BIT, &lkb->lkb_iflags))
 			goto out;
 
+		/* sanity check to do a conversion on a invalid lkb state */
+		if (lkb->lkb_grmode == DLM_LOCK_IV ||
+		    lkb->lkb_status != DLM_LKSTS_GRANTED)
+			goto out;
+
 		if (args->flags & DLM_LKF_QUECVT &&
 		    !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1])
 			goto out;
@@ -2852,14 +2857,16 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
 	case -EINVAL:
 		/* annoy the user because dlm usage is wrong */
 		WARN_ON(1);
-		log_error(ls, "%s %d %x %x %x %d %d", __func__,
+		log_error(ls, "%s %d %x %x %x %d %d %d %d", __func__,
 			  rv, lkb->lkb_id, dlm_iflags_val(lkb), args->flags,
-			  lkb->lkb_status, lkb->lkb_wait_type);
+			  lkb->lkb_status, lkb->lkb_wait_type,
+			  lkb->lkb_rqmode, lkb->lkb_grmode);
 		break;
 	default:
-		log_debug(ls, "%s %d %x %x %x %d %d", __func__,
+		log_debug(ls, "%s %d %x %x %x %d %d %d %d", __func__,
 			  rv, lkb->lkb_id, dlm_iflags_val(lkb), args->flags,
-			  lkb->lkb_status, lkb->lkb_wait_type);
+			  lkb->lkb_status, lkb->lkb_wait_type,
+			  lkb->lkb_rqmode, lkb->lkb_grmode);
 		break;
 	}
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-11-13 16:46 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-13 16:46 [PATCH dlm/next 1/5] dlm: new max length for debugfs entry name Alexander Aring
2024-11-13 16:46 ` [PATCH dlm/next 2/5] dlm: introduce lockspace control debugfs entry Alexander Aring
2024-11-13 16:46 ` [PATCH dlm/next 3/5] dlm: extend debugfs to manipulate more DLM states Alexander Aring
2024-11-13 16:46 ` [PATCH dlm/next 4/5] dlm: fix middle conversion cases in DLM recovery Alexander Aring
2024-11-13 16:46 ` [PATCH dlm/next 5/5] dlm: more debug info on invalid lock requests Alexander Aring

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox