* [Cluster-devel] [PATCHv2 v6.5-rc1 2/3] fs: dlm: introduce DLM_PLOCK_FL_NO_REPLY flag
2023-07-13 16:48 [Cluster-devel] [PATCHv2 v6.5-rc1 0/3] fs: dlm: workarounds and cancellation Alexander Aring
2023-07-13 16:48 ` [Cluster-devel] [PATCHv2 v6.5-rc1 1/3] fs: dlm: ignore DLM_PLOCK_FL_CLOSE flag results Alexander Aring
@ 2023-07-13 16:48 ` Alexander Aring
2023-07-13 16:48 ` [Cluster-devel] [PATCHv2 v6.5-rc1 3/3] fs: dlm: allow to F_SETLKW getting interrupted Alexander Aring
2 siblings, 0 replies; 5+ messages in thread
From: Alexander Aring @ 2023-07-13 16:48 UTC (permalink / raw)
To: cluster-devel.redhat.com
This patch introduces a new flag DLM_PLOCK_FL_NO_REPLY in case an dlm
plock operation should never send a reply back. Currently this is kind of
being handled in DLM_PLOCK_FL_CLOSE, but DLM_PLOCK_FL_CLOSE has more
meanings that it will remove all waiters for a specific nodeid/owner
values in by doing a unlock operation. That DLM_PLOCK_FL_CLOSE never
sends a reply back is not true in case of some user applications and
an error occurred. The new DLM_PLOCK_FL_NO_REPLY flag will tell the
user space application to never ever send a reply back. As this is
now combined with DLM_PLOCK_FL_CLOSE we can use DLM_PLOCK_FL_NO_REPLY to
check if older user space application still doing it and drop the message.
Expecting the user space applications is just copying flags from the
request to the reply, we can use now DLM_PLOCK_FL_NO_REPLY as workaround
to ignore replies from older dlm_controld versions.
Signed-off-by: Alexander Aring <aahringo@redhat.com>
---
fs/dlm/plock.c | 16 ++++++++++------
include/uapi/linux/dlm_plock.h | 1 +
2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 869595a995f7..1fa5b04d0298 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -96,7 +96,7 @@ static void do_unlock_close(const struct dlm_plock_info *info)
op->info.end = OFFSET_MAX;
op->info.owner = info->owner;
- op->info.flags |= DLM_PLOCK_FL_CLOSE;
+ op->info.flags |= (DLM_PLOCK_FL_CLOSE | DLM_PLOCK_FL_NO_REPLY);
send_op(op);
}
@@ -293,7 +293,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
op->info.owner = (__u64)(long) fl->fl_owner;
if (fl->fl_flags & FL_CLOSE) {
- op->info.flags |= DLM_PLOCK_FL_CLOSE;
+ op->info.flags |= (DLM_PLOCK_FL_CLOSE | DLM_PLOCK_FL_NO_REPLY);
send_op(op);
rv = 0;
goto out;
@@ -392,7 +392,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
spin_lock(&ops_lock);
if (!list_empty(&send_list)) {
op = list_first_entry(&send_list, struct plock_op, list);
- if (op->info.flags & DLM_PLOCK_FL_CLOSE)
+ if (op->info.flags & DLM_PLOCK_FL_NO_REPLY)
list_del(&op->list);
else
list_move_tail(&op->list, &recv_list);
@@ -407,7 +407,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
that were generated by the vfs cleaning up for a close
(the process did not make an unlock call). */
- if (op->info.flags & DLM_PLOCK_FL_CLOSE)
+ if (op->info.flags & DLM_PLOCK_FL_NO_REPLY)
dlm_release_plock_op(op);
if (copy_to_user(u, &info, sizeof(info)))
@@ -434,11 +434,15 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
return -EINVAL;
/* Some dlm user space software will send replies back,
- * even if DLM_PLOCK_FL_CLOSE is set e.g. if an error occur.
+ * even if DLM_PLOCK_FL_NO_REPLY is set e.g. if an error
+ * occur as the op is unknown, etc.
+ *
* We can't match them in recv_list because they were never
* be part of it.
+ *
+ * In the future, this handling could be removed.
*/
- if (info.flags & DLM_PLOCK_FL_CLOSE)
+ if (info.flags & DLM_PLOCK_FL_NO_REPLY)
return count;
/*
diff --git a/include/uapi/linux/dlm_plock.h b/include/uapi/linux/dlm_plock.h
index 63b6c1fd9169..8dfa272c929a 100644
--- a/include/uapi/linux/dlm_plock.h
+++ b/include/uapi/linux/dlm_plock.h
@@ -25,6 +25,7 @@ enum {
};
#define DLM_PLOCK_FL_CLOSE 1
+#define DLM_PLOCK_FL_NO_REPLY 2
struct dlm_plock_info {
__u32 version[3];
--
2.31.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [Cluster-devel] [PATCHv2 v6.5-rc1 3/3] fs: dlm: allow to F_SETLKW getting interrupted
2023-07-13 16:48 [Cluster-devel] [PATCHv2 v6.5-rc1 0/3] fs: dlm: workarounds and cancellation Alexander Aring
2023-07-13 16:48 ` [Cluster-devel] [PATCHv2 v6.5-rc1 1/3] fs: dlm: ignore DLM_PLOCK_FL_CLOSE flag results Alexander Aring
2023-07-13 16:48 ` [Cluster-devel] [PATCHv2 v6.5-rc1 2/3] fs: dlm: introduce DLM_PLOCK_FL_NO_REPLY flag Alexander Aring
@ 2023-07-13 16:48 ` Alexander Aring
2023-07-14 23:40 ` Alexander Aring
2 siblings, 1 reply; 5+ messages in thread
From: Alexander Aring @ 2023-07-13 16:48 UTC (permalink / raw)
To: cluster-devel.redhat.com
This patch implements dlm plock F_SETLKW interruption feature. If the
pending plock operation is not sent to user space yet it can simple be
dropped out of the send_list. In case it's already being sent we need to
try to remove the waiters in dlm user space tool. If it was successful a
reply with DLM_PLOCK_OP_CANCEL optype instead of DLM_PLOCK_OP_LOCK comes
back (flag DLM_PLOCK_FL_NO_REPLY was then being cleared in user space)
to signal the cancellation was successful. If a result with optype
DLM_PLOCK_OP_LOCK came back then the cancellation was not successful.
Signed-off-by: Alexander Aring <aahringo@redhat.com>
---
fs/dlm/plock.c | 91 ++++++++++++++++++++++++----------
include/uapi/linux/dlm_plock.h | 1 +
2 files changed, 66 insertions(+), 26 deletions(-)
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 1fa5b04d0298..0781a80c25f7 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -29,6 +29,9 @@ struct plock_async_data {
struct plock_op {
struct list_head list;
+#define DLM_PLOCK_OP_FLAG_SENT 0
+#define DLM_PLOCK_OP_FLAG_INTERRUPTED 1
+ unsigned long flags;
int done;
struct dlm_plock_info info;
/* if set indicates async handling */
@@ -74,30 +77,25 @@ static void send_op(struct plock_op *op)
wake_up(&send_wq);
}
-/* If a process was killed while waiting for the only plock on a file,
- locks_remove_posix will not see any lock on the file so it won't
- send an unlock-close to us to pass on to userspace to clean up the
- abandoned waiter. So, we have to insert the unlock-close when the
- lock call is interrupted. */
-
-static void do_unlock_close(const struct dlm_plock_info *info)
+static int do_lock_cancel(const struct plock_op *orig_op)
{
struct plock_op *op;
op = kzalloc(sizeof(*op), GFP_NOFS);
if (!op)
- return;
+ return -ENOMEM;
+
+ op->info = orig_op->info;
+ op->info.optype = DLM_PLOCK_OP_CANCEL;
+ op->info.flags = DLM_PLOCK_FL_NO_REPLY;
- op->info.optype = DLM_PLOCK_OP_UNLOCK;
- op->info.pid = info->pid;
- op->info.fsid = info->fsid;
- op->info.number = info->number;
- op->info.start = 0;
- op->info.end = OFFSET_MAX;
- op->info.owner = info->owner;
-
- op->info.flags |= (DLM_PLOCK_FL_CLOSE | DLM_PLOCK_FL_NO_REPLY);
send_op(op);
+ wait_event(recv_wq, (orig_op->done != 0));
+
+ if (orig_op->info.optype == DLM_PLOCK_OP_CANCEL)
+ return 0;
+
+ return 1;
}
int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
@@ -156,7 +154,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
send_op(op);
if (op->info.wait) {
- rv = wait_event_killable(recv_wq, (op->done != 0));
+ rv = wait_event_interruptible(recv_wq, (op->done != 0));
if (rv == -ERESTARTSYS) {
spin_lock(&ops_lock);
/* recheck under ops_lock if we got a done != 0,
@@ -166,17 +164,37 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
spin_unlock(&ops_lock);
goto do_lock_wait;
}
- list_del(&op->list);
- spin_unlock(&ops_lock);
+
+ if (!test_bit(DLM_PLOCK_OP_FLAG_SENT, &op->flags)) {
+ /* part of send_list, user never saw the op */
+ list_del(&op->list);
+ spin_unlock(&ops_lock);
+ rv = -EINTR;
+ } else {
+ set_bit(DLM_PLOCK_OP_FLAG_INTERRUPTED, &op->flags);
+ spin_unlock(&ops_lock);
+ rv = do_lock_cancel(op);
+ switch (rv) {
+ case 0:
+ rv = -EINTR;
+ break;
+ case 1:
+ /* cancellation wasn't successful but op is done */
+ goto do_lock_wait;
+ default:
+ /* internal error doing cancel we need to wait */
+ goto wait;
+ }
+ }
log_debug(ls, "%s: wait interrupted %x %llx pid %d",
__func__, ls->ls_global_id,
(unsigned long long)number, op->info.pid);
- do_unlock_close(&op->info);
dlm_release_plock_op(op);
goto out;
}
} else {
+wait:
wait_event(recv_wq, (op->done != 0));
}
@@ -392,10 +410,12 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
spin_lock(&ops_lock);
if (!list_empty(&send_list)) {
op = list_first_entry(&send_list, struct plock_op, list);
- if (op->info.flags & DLM_PLOCK_FL_NO_REPLY)
+ if (op->info.flags & DLM_PLOCK_FL_NO_REPLY) {
list_del(&op->list);
- else
+ } else {
list_move_tail(&op->list, &recv_list);
+ set_bit(DLM_PLOCK_OP_FLAG_SENT, &op->flags);
+ }
memcpy(&info, &op->info, sizeof(info));
}
spin_unlock(&ops_lock);
@@ -454,6 +474,27 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
spin_lock(&ops_lock);
if (info.wait) {
list_for_each_entry(iter, &recv_list, list) {
+ /* A very specific posix lock case allows two
+ * lock request with the same meaning by using
+ * threads. It makes no sense from the application
+ * to do such request, however it is possible.
+ * We need to check the state for cancellation because
+ * we need to know the instance which is interrupted
+ * if two or more of the "same" lock requests are
+ * in waiting state and got interrupted.
+ *
+ * TODO we should move to a instance reference from
+ * request and reply and not go over lock states, but
+ * it seems going over lock states and get the instance
+ * does not make any problems (at least there were no
+ * issues found yet) but it's much cleaner to not think
+ * about all possible special cases and states to instance
+ * has no 1:1 mapping anymore.
+ */
+ if (info.optype == DLM_PLOCK_OP_CANCEL &&
+ !test_bit(DLM_PLOCK_OP_FLAG_INTERRUPTED, &iter->flags))
+ continue;
+
if (iter->info.fsid == info.fsid &&
iter->info.number == info.number &&
iter->info.owner == info.owner &&
@@ -477,9 +518,7 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
if (op) {
/* Sanity check that op and info match. */
- if (info.wait)
- WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK);
- else
+ if (!info.wait)
WARN_ON(op->info.fsid != info.fsid ||
op->info.number != info.number ||
op->info.owner != info.owner ||
diff --git a/include/uapi/linux/dlm_plock.h b/include/uapi/linux/dlm_plock.h
index 8dfa272c929a..9c4c083c824a 100644
--- a/include/uapi/linux/dlm_plock.h
+++ b/include/uapi/linux/dlm_plock.h
@@ -22,6 +22,7 @@ enum {
DLM_PLOCK_OP_LOCK = 1,
DLM_PLOCK_OP_UNLOCK,
DLM_PLOCK_OP_GET,
+ DLM_PLOCK_OP_CANCEL,
};
#define DLM_PLOCK_FL_CLOSE 1
--
2.31.1
^ permalink raw reply related [flat|nested] 5+ messages in thread