linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Miklos Szeredi <mszeredi@redhat.com>
To: linux-fsdevel@vger.kernel.org
Cc: Jan Kara <jack@suse.cz>, Amir Goldstein <amir73il@gmail.com>
Subject: [PATCH v2] fanotify: add watchdog for permission events
Date: Tue,  9 Sep 2025 16:30:47 +0200	[thread overview]
Message-ID: <20250909143053.112171-1-mszeredi@redhat.com> (raw)

This is to make it easier to debug issues with AV software, which time and
again deadlocks with no indication of where the issue comes from, and the
kernel being blamed for the deadlock.  Then we need to analyze dumps to
prove that the kernel is not in fact at fault.

The deadlock comes from recursion: handling the event triggers another
permission event, in some roundabout way, obviously, otherwise it would
have been found in testing.

With this patch a warning is printed when permission event is received by
userspace but not answered for more than the timeout specified in
/proc/sys/fs/fanotify/watchdog_timeout.  The watchdog can be turned off by
setting the timeout to zero (which is the default).

The timeout is very coarse (T <= t < 2T) but I guess it's good enough for
the purpose.

Overhead should be minimal.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
v2:
	- removed config option
	- rename pid to recv_pid
	- remove from union
	- add sysctl
	- prevent race for list_empty check

 fs/notify/fanotify/fanotify.h      |  4 +-
 fs/notify/fanotify/fanotify_user.c | 95 ++++++++++++++++++++++++++++++
 include/linux/fsnotify_backend.h   |  2 +
 3 files changed, 100 insertions(+), 1 deletion(-)

diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index b78308975082..1a007e211bae 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -437,11 +437,13 @@ FANOTIFY_ME(struct fanotify_event *event)
 struct fanotify_perm_event {
 	struct fanotify_event fae;
 	struct path path;
-	const loff_t *ppos;		/* optional file range info */
+	const loff_t *ppos;	/* optional file range info */
 	size_t count;
 	u32 response;			/* userspace answer to the event */
 	unsigned short state;		/* state of the event */
+	unsigned short watchdog_cnt;	/* already scanned by watchdog? */
 	int fd;		/* fd we passed to userspace for this event */
+	pid_t recv_pid;	/* pid of task receiving the event */
 	union {
 		struct fanotify_response_info_header hdr;
 		struct fanotify_response_info_audit_rule audit_rule;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index b192ee068a7a..033333c90393 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -50,6 +50,7 @@
 
 /* configurable via /proc/sys/fs/fanotify/ */
 static int fanotify_max_queued_events __read_mostly;
+static int perm_group_timeout __read_mostly;
 
 #ifdef CONFIG_SYSCTL
 
@@ -85,6 +86,14 @@ static const struct ctl_table fanotify_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO
 	},
+	{
+		.procname	= "watchdog_timeout",
+		.data		= &perm_group_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
 };
 
 static void __init fanotify_sysctls_init(void)
@@ -95,6 +104,84 @@ static void __init fanotify_sysctls_init(void)
 #define fanotify_sysctls_init() do { } while (0)
 #endif /* CONFIG_SYSCTL */
 
+static LIST_HEAD(perm_group_list);
+static DEFINE_SPINLOCK(perm_group_lock);
+static void perm_group_watchdog(struct work_struct *work);
+static DECLARE_DELAYED_WORK(perm_group_work, perm_group_watchdog);
+
+static void perm_group_watchdog_schedule(void)
+{
+	schedule_delayed_work(&perm_group_work, secs_to_jiffies(perm_group_timeout));
+}
+
+static void perm_group_watchdog(struct work_struct *work)
+{
+	struct fsnotify_group *group;
+	struct fanotify_perm_event *event;
+	struct task_struct *task;
+	pid_t failed_pid = 0;
+
+	guard(spinlock)(&perm_group_lock);
+	if (list_empty(&perm_group_list))
+		return;
+
+	list_for_each_entry(group, &perm_group_list, fanotify_data.perm_group) {
+		/*
+		 * Ok to test without lock, racing with an addition is
+		 * fine, will deal with it next round
+		 */
+		if (list_empty(&group->fanotify_data.access_list))
+			continue;
+
+		scoped_guard(spinlock, &group->notification_lock) {
+			list_for_each_entry(event, &group->fanotify_data.access_list, fae.fse.list) {
+				if (likely(event->watchdog_cnt == 0)) {
+					event->watchdog_cnt = 1;
+				} else if (event->watchdog_cnt == 1) {
+					/* Report on event only once */
+					event->watchdog_cnt = 2;
+
+					/* Do not report same pid repeatedly */
+					if (event->recv_pid == failed_pid)
+						continue;
+
+					failed_pid = event->recv_pid;
+					rcu_read_lock();
+					task = find_task_by_pid_ns(event->recv_pid, &init_pid_ns);
+					pr_warn_ratelimited("PID %u (%s) failed to respond to fanotify queue for more than %i seconds\n",
+							    event->recv_pid, task ? task->comm : NULL, perm_group_timeout);
+					rcu_read_unlock();
+				}
+			}
+		}
+	}
+	perm_group_watchdog_schedule();
+}
+
+static void fanotify_perm_watchdog_group_remove(struct fsnotify_group *group)
+{
+	if (!list_empty(&group->fanotify_data.perm_group)) {
+		/* Perm event watchdog can no longer scan this group. */
+		spin_lock(&perm_group_lock);
+		list_del(&group->fanotify_data.perm_group);
+		spin_unlock(&perm_group_lock);
+	}
+}
+
+static void fanotify_perm_watchdog_group_add(struct fsnotify_group *group)
+{
+	if (!perm_group_timeout)
+		return;
+
+	guard(spinlock)(&perm_group_lock);
+	if (list_empty(&group->fanotify_data.perm_group)) {
+		/* Add to perm_group_list for monitoring by watchdog. */
+		if (list_empty(&perm_group_list))
+			perm_group_watchdog_schedule();
+		list_add_tail(&group->fanotify_data.perm_group, &perm_group_list);
+	}
+}
+
 /*
  * All flags that may be specified in parameter event_f_flags of fanotify_init.
  *
@@ -953,6 +1040,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
 				spin_lock(&group->notification_lock);
 				list_add_tail(&event->fse.list,
 					&group->fanotify_data.access_list);
+				FANOTIFY_PERM(event)->recv_pid = current->pid;
 				spin_unlock(&group->notification_lock);
 			}
 		}
@@ -1012,6 +1100,8 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 	 */
 	fsnotify_group_stop_queueing(group);
 
+	fanotify_perm_watchdog_group_remove(group);
+
 	/*
 	 * Process all permission events on access_list and notification queue
 	 * and simulate reply from userspace.
@@ -1465,6 +1555,10 @@ static int fanotify_add_mark(struct fsnotify_group *group,
 	fsnotify_group_unlock(group);
 
 	fsnotify_put_mark(fsn_mark);
+
+	if (!ret && (mask & FANOTIFY_PERM_EVENTS))
+		fanotify_perm_watchdog_group_add(group);
+
 	return ret;
 }
 
@@ -1625,6 +1719,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	group->fanotify_data.f_flags = event_f_flags;
 	init_waitqueue_head(&group->fanotify_data.access_waitq);
 	INIT_LIST_HEAD(&group->fanotify_data.access_list);
+	INIT_LIST_HEAD(&group->fanotify_data.perm_group);
 	switch (class) {
 	case FAN_CLASS_NOTIF:
 		group->priority = FSNOTIFY_PRIO_NORMAL;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index d4034ddaf392..7f7fe4f3aa34 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -273,6 +273,8 @@ struct fsnotify_group {
 			int f_flags; /* event_f_flags from fanotify_init() */
 			struct ucounts *ucounts;
 			mempool_t error_events_pool;
+			/* chained on perm_group_list */
+			struct list_head perm_group;
 		} fanotify_data;
 #endif /* CONFIG_FANOTIFY */
 	};
-- 
2.51.0


             reply	other threads:[~2025-09-09 14:30 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-09 14:30 Miklos Szeredi [this message]
2025-09-11 10:12 ` [PATCH v2] fanotify: add watchdog for permission events Jan Kara
2025-09-11 11:08   ` Miklos Szeredi
2025-09-11 11:25   ` Amir Goldstein

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250909143053.112171-1-mszeredi@redhat.com \
    --to=mszeredi@redhat.com \
    --cc=amir73il@gmail.com \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).