[RFC PATCH] fanotify: add watchdog for permission events

linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [RFC PATCH] fanotify: add watchdog for permission events
@ 2025-07-03 13:05 Miklos Szeredi
  2025-07-03 15:42 ` Amir Goldstein
  2025-07-04  9:56 ` Jan Kara
  0 siblings, 2 replies; 6+ messages in thread
From: Miklos Szeredi @ 2025-07-03 13:05 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Jan Kara, Amir Goldstein, Ian Kent, Eric Sandeen

This is to make it easier to debug issues with AV software, which time and
again deadlocks with no indication of where the issue comes from, and the
kernel being blamed for the deadlock.  Then we need to analyze dumps to
prove that the kernel is not in fact at fault.

With this patch a warning is printed when permission event is received by
userspace but not answered for more than 20 seconds.

The timeout is very coarse (20-40s) but I guess it's good enough for the
purpose.

Overhead should be minimal.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/notify/fanotify/Kconfig         |   5 ++
 fs/notify/fanotify/fanotify.h      |   6 +-
 fs/notify/fanotify/fanotify_user.c | 102 +++++++++++++++++++++++++++++
 include/linux/fsnotify_backend.h   |   4 ++
 4 files changed, 116 insertions(+), 1 deletion(-)

diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
index 0e36aaf379b7..eeb9c443254e 100644
--- a/fs/notify/fanotify/Kconfig
+++ b/fs/notify/fanotify/Kconfig
@@ -24,3 +24,8 @@ config FANOTIFY_ACCESS_PERMISSIONS
 	   hierarchical storage management systems.
 
 	   If unsure, say N.
+
+config FANOTIFY_PERM_WATCHDOG
+       bool "fanotify permission event watchdog"
+       depends on FANOTIFY_ACCESS_PERMISSIONS
+       default n
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index b44e70e44be6..8b60fbb9594f 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -438,10 +438,14 @@ FANOTIFY_ME(struct fanotify_event *event)
 struct fanotify_perm_event {
 	struct fanotify_event fae;
 	struct path path;
-	const loff_t *ppos;		/* optional file range info */
+	union {
+		const loff_t *ppos;	/* optional file range info */
+		pid_t pid;		/* pid of task processing the event */
+	};
 	size_t count;
 	u32 response;			/* userspace answer to the event */
 	unsigned short state;		/* state of the event */
+	unsigned short watchdog_cnt;	/* already scanned by watchdog? */
 	int fd;		/* fd we passed to userspace for this event */
 	union {
 		struct fanotify_response_info_header hdr;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 87f861e9004f..a9a34da2c864 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -95,6 +95,96 @@ static void __init fanotify_sysctls_init(void)
 #define fanotify_sysctls_init() do { } while (0)
 #endif /* CONFIG_SYSCTL */
 
+#ifdef CONFIG_FANOTIFY_PERM_WATCHDOG
+static LIST_HEAD(perm_group_list);
+static DEFINE_SPINLOCK(perm_group_lock);
+static void perm_group_watchdog(struct work_struct *work);
+static DECLARE_DELAYED_WORK(perm_group_work, perm_group_watchdog);
+static unsigned int perm_group_timeout = 20;
+
+static void perm_group_watchdog_schedule(void)
+{
+	schedule_delayed_work(&perm_group_work, secs_to_jiffies(perm_group_timeout));
+}
+
+static void perm_group_watchdog(struct work_struct *work)
+{
+	struct fsnotify_group *group;
+	struct fanotify_perm_event *event;
+	struct task_struct *task;
+	pid_t failed_pid = 0;
+
+	guard(spinlock)(&perm_group_lock);
+	if (list_empty(&perm_group_list))
+		return;
+
+	list_for_each_entry(group, &perm_group_list, fanotify_data.perm_group) {
+		/*
+		 * Ok to test without lock, racing with an addition is
+		 * fine, will deal with it next round
+		 */
+		if (list_empty(&group->fanotify_data.access_list))
+			continue;
+
+		scoped_guard(spinlock, &group->notification_lock) {
+			list_for_each_entry(event, &group->fanotify_data.access_list, fae.fse.list) {
+				if (likely(event->watchdog_cnt == 0)) {
+					event->watchdog_cnt = 1;
+				} else if (event->watchdog_cnt == 1) {
+					/* Report on event only once */
+					event->watchdog_cnt = 2;
+
+					/* Do not report same pid repeatedly */
+					if (event->pid == failed_pid)
+						continue;
+
+					failed_pid = event->pid;
+					rcu_read_lock();
+					task = find_task_by_pid_ns(event->pid, &init_pid_ns);
+					pr_warn_ratelimited("PID %u (%s) failed to respond to fanotify queue for more than %i seconds\n",
+							    event->pid, task ? task->comm : NULL, perm_group_timeout);
+					rcu_read_unlock();
+				}
+			}
+		}
+	}
+	perm_group_watchdog_schedule();
+}
+
+static void fanotify_perm_watchdog_group_remove(struct fsnotify_group *group)
+{
+	if (!list_empty(&group->fanotify_data.perm_group)) {
+		/* Perm event watchdog can no longer scan this group. */
+		spin_lock(&perm_group_lock);
+		list_del(&group->fanotify_data.perm_group);
+		spin_unlock(&perm_group_lock);
+	}
+}
+
+static void fanotify_perm_watchdog_group_add(struct fsnotify_group *group)
+{
+	if (list_empty(&group->fanotify_data.perm_group)) {
+		/* Add to perm_group_list for monitoring by watchdog. */
+		spin_lock(&perm_group_lock);
+		if (list_empty(&perm_group_list))
+			perm_group_watchdog_schedule();
+		list_add_tail(&group->fanotify_data.perm_group, &perm_group_list);
+		spin_unlock(&perm_group_lock);
+	}
+}
+
+#else
+
+static void fanotify_perm_watchdog_group_remove(struct fsnotify_group *group)
+{
+}
+
+static void fanotify_perm_watchdog_group_add(struct fsnotify_group *group)
+{
+}
+
+#endif
+
 /*
  * All flags that may be specified in parameter event_f_flags of fanotify_init.
  *
@@ -210,6 +300,8 @@ static void fanotify_unhash_event(struct fsnotify_group *group,
 	hlist_del_init(&event->merge_list);
 }
 
+
+
 /*
  * Get an fanotify notification event if one exists and is small
  * enough to fit in "count". Return an error pointer if the count
@@ -953,6 +1045,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
 				spin_lock(&group->notification_lock);
 				list_add_tail(&event->fse.list,
 					&group->fanotify_data.access_list);
+				FANOTIFY_PERM(event)->pid = current->pid;
 				spin_unlock(&group->notification_lock);
 			}
 		}
@@ -1012,6 +1105,8 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 	 */
 	fsnotify_group_stop_queueing(group);
 
+	fanotify_perm_watchdog_group_remove(group);
+
 	/*
 	 * Process all permission events on access_list and notification queue
 	 * and simulate reply from userspace.
@@ -1464,6 +1559,10 @@ static int fanotify_add_mark(struct fsnotify_group *group,
 	fsnotify_group_unlock(group);
 
 	fsnotify_put_mark(fsn_mark);
+
+	if (!ret && (mask & FANOTIFY_PERM_EVENTS))
+		fanotify_perm_watchdog_group_add(group);
+
 	return ret;
 }
 
@@ -1622,6 +1721,9 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	group->fanotify_data.f_flags = event_f_flags;
 	init_waitqueue_head(&group->fanotify_data.access_waitq);
 	INIT_LIST_HEAD(&group->fanotify_data.access_list);
+#ifdef CONFIG_FANOTIFY_PERM_WATCHDOG
+	INIT_LIST_HEAD(&group->fanotify_data.perm_group);
+#endif
 	switch (class) {
 	case FAN_CLASS_NOTIF:
 		group->priority = FSNOTIFY_PRIO_NORMAL;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index fc27b53c58c2..5276fb3ada34 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -272,6 +272,10 @@ struct fsnotify_group {
 			int f_flags; /* event_f_flags from fanotify_init() */
 			struct ucounts *ucounts;
 			mempool_t error_events_pool;
+#ifdef CONFIG_FANOTIFY_PERM_WATCHDOG
+			/* chained on perm_group_list */
+			struct list_head perm_group;
+#endif
 		} fanotify_data;
 #endif /* CONFIG_FANOTIFY */
 	};
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] fanotify: add watchdog for permission events
  2025-07-03 13:05 [RFC PATCH] fanotify: add watchdog for permission events Miklos Szeredi
@ 2025-07-03 15:42 ` Amir Goldstein
  2025-07-03 16:22   ` Miklos Szeredi
  2025-07-04  9:56 ` Jan Kara
  1 sibling, 1 reply; 6+ messages in thread
From: Amir Goldstein @ 2025-07-03 15:42 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, Jan Kara, Ian Kent, Eric Sandeen

On Thu, Jul 3, 2025 at 3:05 PM Miklos Szeredi <mszeredi@redhat.com> wrote:
>
> This is to make it easier to debug issues with AV software, which time and
> again deadlocks with no indication of where the issue comes from, and the
> kernel being blamed for the deadlock.  Then we need to analyze dumps to
> prove that the kernel is not in fact at fault.

Interesting.
Do you mean deadlock in userspace or deadlock on some kernel
lock because the server is operating on the filesystem and the
permission event was in a locked context?

>
> With this patch a warning is printed when permission event is received by
> userspace but not answered for more than 20 seconds.
>

There is a nuance here.
Your patch does not count the time from when the operation was queued
and blocked.

It counts the time from when the AV software *reads* the event.
If AV software went off to take a nap and does not read events,
you will not get a watchdog.

Is that good enough to catch the usual offenders?
That relates to my question above whether the deadlock
is inherently because of doing some fs work in the context
of handling a permission event.

If it is, then I wonder if you could share some details about the
analysis of the deadlocks.

Reason I am asking is because when working on pre-content
events, as a side effect, because they share the same hook
with FAN_ACCESS_PERM, the latter event should not be emitted
in the context of fs locks (unless I missed something).

When auditing FAN_OPEN_PERM I recall only one context that
seemed like a potential deadlock trap, which is the permission
hook called from finish_open() in atomic_open() in the context of
 inode_lock{,_shared}(dir->d_inode);

So are the deadlocks that you found happen on fs with atomic_open()?
Technically, we can release the dir inode lock in finish_open()
I think it's just a matter of code architecture to make this happen.

> The timeout is very coarse (20-40s) but I guess it's good enough for the
> purpose.
>
> Overhead should be minimal.
>
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
>  fs/notify/fanotify/Kconfig         |   5 ++
>  fs/notify/fanotify/fanotify.h      |   6 +-
>  fs/notify/fanotify/fanotify_user.c | 102 +++++++++++++++++++++++++++++
>  include/linux/fsnotify_backend.h   |   4 ++
>  4 files changed, 116 insertions(+), 1 deletion(-)
>
> diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
> index 0e36aaf379b7..eeb9c443254e 100644
> --- a/fs/notify/fanotify/Kconfig
> +++ b/fs/notify/fanotify/Kconfig
> @@ -24,3 +24,8 @@ config FANOTIFY_ACCESS_PERMISSIONS
>            hierarchical storage management systems.
>
>            If unsure, say N.
> +
> +config FANOTIFY_PERM_WATCHDOG
> +       bool "fanotify permission event watchdog"
> +       depends on FANOTIFY_ACCESS_PERMISSIONS
> +       default n

I think that's one of those features where sysctl knob is more useful to
distros and admin than Kconfig.
Might as well be a sysctl knob to control perm_group_timeout
where 0 means off.

> diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> index b44e70e44be6..8b60fbb9594f 100644
> --- a/fs/notify/fanotify/fanotify.h
> +++ b/fs/notify/fanotify/fanotify.h
> @@ -438,10 +438,14 @@ FANOTIFY_ME(struct fanotify_event *event)
>  struct fanotify_perm_event {
>         struct fanotify_event fae;
>         struct path path;
> -       const loff_t *ppos;             /* optional file range info */
> +       union {
> +               const loff_t *ppos;     /* optional file range info */
> +               pid_t pid;              /* pid of task processing the event */

It is a bit odd to have a pid_t pid field here as well as
struct pid *pid field in fae.pid (the event generating pid).
So I think, either reuse fae.pid to keep reference to reader task_pid
or leave this pid_t field here with a more specific name.

> +       };
>         size_t count;
>         u32 response;                   /* userspace answer to the event */
>         unsigned short state;           /* state of the event */
> +       unsigned short watchdog_cnt;    /* already scanned by watchdog? */
>         int fd;         /* fd we passed to userspace for this event */
>         union {
>                 struct fanotify_response_info_header hdr;
> diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
> index 87f861e9004f..a9a34da2c864 100644
> --- a/fs/notify/fanotify/fanotify_user.c
> +++ b/fs/notify/fanotify/fanotify_user.c
> @@ -95,6 +95,96 @@ static void __init fanotify_sysctls_init(void)
>  #define fanotify_sysctls_init() do { } while (0)
>  #endif /* CONFIG_SYSCTL */
>
> +#ifdef CONFIG_FANOTIFY_PERM_WATCHDOG
> +static LIST_HEAD(perm_group_list);
> +static DEFINE_SPINLOCK(perm_group_lock);
> +static void perm_group_watchdog(struct work_struct *work);
> +static DECLARE_DELAYED_WORK(perm_group_work, perm_group_watchdog);
> +static unsigned int perm_group_timeout = 20;
> +
> +static void perm_group_watchdog_schedule(void)
> +{
> +       schedule_delayed_work(&perm_group_work, secs_to_jiffies(perm_group_timeout));
> +}
> +
> +static void perm_group_watchdog(struct work_struct *work)
> +{
> +       struct fsnotify_group *group;
> +       struct fanotify_perm_event *event;
> +       struct task_struct *task;
> +       pid_t failed_pid = 0;
> +
> +       guard(spinlock)(&perm_group_lock);
> +       if (list_empty(&perm_group_list))
> +               return;
> +
> +       list_for_each_entry(group, &perm_group_list, fanotify_data.perm_group) {
> +               /*
> +                * Ok to test without lock, racing with an addition is
> +                * fine, will deal with it next round
> +                */
> +               if (list_empty(&group->fanotify_data.access_list))
> +                       continue;
> +
> +               scoped_guard(spinlock, &group->notification_lock) {
> +                       list_for_each_entry(event, &group->fanotify_data.access_list, fae.fse.list) {
> +                               if (likely(event->watchdog_cnt == 0)) {
> +                                       event->watchdog_cnt = 1;
> +                               } else if (event->watchdog_cnt == 1) {
> +                                       /* Report on event only once */
> +                                       event->watchdog_cnt = 2;
> +
> +                                       /* Do not report same pid repeatedly */
> +                                       if (event->pid == failed_pid)
> +                                               continue;
> +
> +                                       failed_pid = event->pid;
> +                                       rcu_read_lock();
> +                                       task = find_task_by_pid_ns(event->pid, &init_pid_ns);
> +                                       pr_warn_ratelimited("PID %u (%s) failed to respond to fanotify queue for more than %i seconds\n",
> +                                                           event->pid, task ? task->comm : NULL, perm_group_timeout);
> +                                       rcu_read_unlock();
> +                               }
> +                       }
> +               }
> +       }
> +       perm_group_watchdog_schedule();
> +}
> +
> +static void fanotify_perm_watchdog_group_remove(struct fsnotify_group *group)
> +{
> +       if (!list_empty(&group->fanotify_data.perm_group)) {
> +               /* Perm event watchdog can no longer scan this group. */
> +               spin_lock(&perm_group_lock);
> +               list_del(&group->fanotify_data.perm_group);
> +               spin_unlock(&perm_group_lock);
> +       }
> +}
> +
> +static void fanotify_perm_watchdog_group_add(struct fsnotify_group *group)
> +{
> +       if (list_empty(&group->fanotify_data.perm_group)) {
> +               /* Add to perm_group_list for monitoring by watchdog. */
> +               spin_lock(&perm_group_lock);
> +               if (list_empty(&perm_group_list))
> +                       perm_group_watchdog_schedule();
> +               list_add_tail(&group->fanotify_data.perm_group, &perm_group_list);
> +               spin_unlock(&perm_group_lock);
> +       }
> +}
> +
> +#else
> +
> +static void fanotify_perm_watchdog_group_remove(struct fsnotify_group *group)
> +{
> +}
> +
> +static void fanotify_perm_watchdog_group_add(struct fsnotify_group *group)
> +{
> +}
> +
> +#endif
> +
>  /*
>   * All flags that may be specified in parameter event_f_flags of fanotify_init.
>   *
> @@ -210,6 +300,8 @@ static void fanotify_unhash_event(struct fsnotify_group *group,
>         hlist_del_init(&event->merge_list);
>  }
>
> +
> +
>  /*
>   * Get an fanotify notification event if one exists and is small
>   * enough to fit in "count". Return an error pointer if the count
> @@ -953,6 +1045,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
>                                 spin_lock(&group->notification_lock);
>                                 list_add_tail(&event->fse.list,
>                                         &group->fanotify_data.access_list);
> +                               FANOTIFY_PERM(event)->pid = current->pid;
>                                 spin_unlock(&group->notification_lock);
>                         }
>                 }
> @@ -1012,6 +1105,8 @@ static int fanotify_release(struct inode *ignored, struct file *file)
>          */
>         fsnotify_group_stop_queueing(group);
>
> +       fanotify_perm_watchdog_group_remove(group);
> +
>         /*
>          * Process all permission events on access_list and notification queue
>          * and simulate reply from userspace.
> @@ -1464,6 +1559,10 @@ static int fanotify_add_mark(struct fsnotify_group *group,
>         fsnotify_group_unlock(group);
>
>         fsnotify_put_mark(fsn_mark);
> +
> +       if (!ret && (mask & FANOTIFY_PERM_EVENTS))
> +               fanotify_perm_watchdog_group_add(group);
> +

This ends up doing
       if (list_empty(&group->fanotify_data.perm_group)) {

Without holding the group lock nor the perm_group_lock.
it does not look safe against adding to fanotify_data.perm_group
twice.

It would have been more natural and balanced to add group
to watchdog list on fanotify_init().

You can do that based on (group->priority > FSNOTIFY_PRIO_NORMAL)
because while a program could create an fanotify group with
priority FAN_CLASS_CONTENT and not add permission event
watches, there is absolutely no reason to optimize for this case and
not add this group to the "permission events capable" perm_group list.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] fanotify: add watchdog for permission events
  2025-07-03 15:42 ` Amir Goldstein
@ 2025-07-03 16:22   ` Miklos Szeredi
  0 siblings, 0 replies; 6+ messages in thread
From: Miklos Szeredi @ 2025-07-03 16:22 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Miklos Szeredi, linux-fsdevel, Jan Kara, Ian Kent, Eric Sandeen

On Thu, 3 Jul 2025 at 17:47, Amir Goldstein <amir73il@gmail.com> wrote:

> Do you mean deadlock in userspace or deadlock on some kernel
> lock because the server is operating on the filesystem and the
> permission event was in a locked context?

Server is doing in a loop:

1) read perm event from from fanotify fd
2) do something to decide between allow/deny
3) write reply to fanotify

It doesn't matter where 2) gets stuck, in userspace or in the kernel,
the result is the same: a stuck open syscall.

> There is a nuance here.
> Your patch does not count the time from when the operation was queued
> and blocked.
>
> It counts the time from when the AV software *reads* the event.
> If AV software went off to take a nap and does not read events,
> you will not get a watchdog.

Right.

But server is unlikely to be doing anything between 3) and 1), so in
practice the head of the queue is unlikely to be stuck.  But maybe the
watchdog could be taught to handle that case as well (no outstanding
requests ad no forward progress in the queue).

> If it is, then I wonder if you could share some details about the
> analysis of the deadlocks.

It's very often plain recursion: handling the event triggers another
permission event,  in some roundabout way, obviously, otherwise it
would have been found in testing.

This is apparently a continuing problem for support teams,  because
when this happens the OS is the first to blame: "everything froze,
this is broke" and sometimes it's hard to convince customers and even
harder to convince AV vendors, that it's not in fact the OS.

> So are the deadlocks that you found happen on fs with atomic_open()?
> Technically, we can release the dir inode lock in finish_open()
> I think it's just a matter of code architecture to make this happen.

I don't think it's this.

> I think that's one of those features where sysctl knob is more useful to
> distros and admin than Kconfig.
> Might as well be a sysctl knob to control perm_group_timeout
> where 0 means off.

Okay.

> It is a bit odd to have a pid_t pid field here as well as
> struct pid *pid field in fae.pid (the event generating pid).
> So I think, either reuse fae.pid to keep reference to reader task_pid
> or leave this pid_t field here with a more specific name.

Yeah, I noticed the name conflict, then forgot about it.  Will fix.

> It would have been more natural and balanced to add group
> to watchdog list on fanotify_init().
>
> You can do that based on (group->priority > FSNOTIFY_PRIO_NORMAL)
> because while a program could create an fanotify group with
> priority FAN_CLASS_CONTENT and not add permission event
> watches, there is absolutely no reason to optimize for this case and
> not add this group to the "permission events capable" perm_group list.

Yeah, it's aesthetically more pleasing, but I wonder if it's worth it
having to explain why it's done this way.

A more self explanatory solution is to just move the list_empty()
inside the spinlock, and performance-wise it's not going to matter.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] fanotify: add watchdog for permission events
  2025-07-03 13:05 [RFC PATCH] fanotify: add watchdog for permission events Miklos Szeredi
  2025-07-03 15:42 ` Amir Goldstein
@ 2025-07-04  9:56 ` Jan Kara
  2025-07-04 10:22   ` Amir Goldstein
  1 sibling, 1 reply; 6+ messages in thread
From: Jan Kara @ 2025-07-04  9:56 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: linux-fsdevel, Jan Kara, Amir Goldstein, Ian Kent, Eric Sandeen

On Thu 03-07-25 15:05:37, Miklos Szeredi wrote:
> This is to make it easier to debug issues with AV software, which time and
> again deadlocks with no indication of where the issue comes from, and the
> kernel being blamed for the deadlock.  Then we need to analyze dumps to
> prove that the kernel is not in fact at fault.

I share the pain. I had to do quite some of these analyses myself :).
Luckily our support guys have trained to do the analysis themselves over
the years so it rarely reaches my table anymore.

> With this patch a warning is printed when permission event is received by
> userspace but not answered for more than 20 seconds.
> 
> The timeout is very coarse (20-40s) but I guess it's good enough for the
> purpose.

I'm not opposed to the idea (although I agree with Amir that it should be
tunable - we have /proc/sys/fs/fanotify/ for similar things). Just I'm not
sure it will have the desired deterring effect for fanotify users wanting
to blame the kernel. What usually convinces them is showing where their
tasks supposed to write reply to permission event (i.e., those that have
corresponding event fds in their fdtable) are blocked and hence they cannot
reply. But with some education I suppose it can work. After all the
messages you print contain the task responsible to answer which is already
helpful.

> +config FANOTIFY_PERM_WATCHDOG
> +       bool "fanotify permission event watchdog"
> +       depends on FANOTIFY_ACCESS_PERMISSIONS
> +       default n

As Amir wrote, I don't think we need a kconfig for this, configuration
through /proc/sys/fs/fanotify/ will be much more flexible.

> diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> index b44e70e44be6..8b60fbb9594f 100644
> --- a/fs/notify/fanotify/fanotify.h
> +++ b/fs/notify/fanotify/fanotify.h
> @@ -438,10 +438,14 @@ FANOTIFY_ME(struct fanotify_event *event)
>  struct fanotify_perm_event {
>  	struct fanotify_event fae;
>  	struct path path;
> -	const loff_t *ppos;		/* optional file range info */
> +	union {
> +		const loff_t *ppos;	/* optional file range info */
> +		pid_t pid;		/* pid of task processing the event */
> +	};

I think Amir complained about the generic 'pid' name already. Maybe
processing_pid? Also I'd just get rid of the union. We don't have *that*
many permission events that 4 bytes would matter and possible interactions
between pre-content events and this watchdog using the same space make me
somewhat uneasy.

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] fanotify: add watchdog for permission events
  2025-07-04  9:56 ` Jan Kara
@ 2025-07-04 10:22   ` Amir Goldstein
  2025-09-09 14:34     ` Miklos Szeredi
  0 siblings, 1 reply; 6+ messages in thread
From: Amir Goldstein @ 2025-07-04 10:22 UTC (permalink / raw)
  To: Jan Kara; +Cc: Miklos Szeredi, linux-fsdevel, Ian Kent, Eric Sandeen

On Fri, Jul 4, 2025 at 11:56 AM Jan Kara <jack@suse.cz> wrote:
>
> On Thu 03-07-25 15:05:37, Miklos Szeredi wrote:
> > This is to make it easier to debug issues with AV software, which time and
> > again deadlocks with no indication of where the issue comes from, and the
> > kernel being blamed for the deadlock.  Then we need to analyze dumps to
> > prove that the kernel is not in fact at fault.
>
> I share the pain. I had to do quite some of these analyses myself :).
> Luckily our support guys have trained to do the analysis themselves over
> the years so it rarely reaches my table anymore.
>
> > With this patch a warning is printed when permission event is received by
> > userspace but not answered for more than 20 seconds.
> >
> > The timeout is very coarse (20-40s) but I guess it's good enough for the
> > purpose.
>
> I'm not opposed to the idea (although I agree with Amir that it should be
> tunable - we have /proc/sys/fs/fanotify/ for similar things). Just I'm not
> sure it will have the desired deterring effect for fanotify users wanting
> to blame the kernel. What usually convinces them is showing where their
> tasks supposed to write reply to permission event (i.e., those that have
> corresponding event fds in their fdtable) are blocked and hence they cannot
> reply. But with some education I suppose it can work. After all the
> messages you print contain the task responsible to answer which is already
> helpful.
>
> > +config FANOTIFY_PERM_WATCHDOG
> > +       bool "fanotify permission event watchdog"
> > +       depends on FANOTIFY_ACCESS_PERMISSIONS
> > +       default n
>
> As Amir wrote, I don't think we need a kconfig for this, configuration
> through /proc/sys/fs/fanotify/ will be much more flexible.
>
> > diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> > index b44e70e44be6..8b60fbb9594f 100644
> > --- a/fs/notify/fanotify/fanotify.h
> > +++ b/fs/notify/fanotify/fanotify.h
> > @@ -438,10 +438,14 @@ FANOTIFY_ME(struct fanotify_event *event)
> >  struct fanotify_perm_event {
> >       struct fanotify_event fae;
> >       struct path path;
> > -     const loff_t *ppos;             /* optional file range info */
> > +     union {
> > +             const loff_t *ppos;     /* optional file range info */
> > +             pid_t pid;              /* pid of task processing the event */
> > +     };
>
> I think Amir complained about the generic 'pid' name already. Maybe
> processing_pid? Also I'd just get rid of the union. We don't have *that*
> many permission events that 4 bytes would matter and possible interactions
> between pre-content events and this watchdog using the same space make me
> somewhat uneasy.

fae.pid is not used after copy_event_to_user() so it can be reused
to hold struct pid of processing task while waiting for response.
But anyway, I think there is a hole in the struct after int fd.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] fanotify: add watchdog for permission events
  2025-07-04 10:22   ` Amir Goldstein
@ 2025-09-09 14:34     ` Miklos Szeredi
  0 siblings, 0 replies; 6+ messages in thread
From: Miklos Szeredi @ 2025-09-09 14:34 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Jan Kara, Miklos Szeredi, linux-fsdevel, Ian Kent, Eric Sandeen

On Fri, 4 Jul 2025 at 12:22, Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Fri, Jul 4, 2025 at 11:56 AM Jan Kara <jack@suse.cz> wrote:
> >
> > On Thu 03-07-25 15:05:37, Miklos Szeredi wrote:
> > > This is to make it easier to debug issues with AV software, which time and
> > > again deadlocks with no indication of where the issue comes from, and the
> > > kernel being blamed for the deadlock.  Then we need to analyze dumps to
> > > prove that the kernel is not in fact at fault.
> >
> > I share the pain. I had to do quite some of these analyses myself :).
> > Luckily our support guys have trained to do the analysis themselves over
> > the years so it rarely reaches my table anymore.
> >
> > > With this patch a warning is printed when permission event is received by
> > > userspace but not answered for more than 20 seconds.
> > >
> > > The timeout is very coarse (20-40s) but I guess it's good enough for the
> > > purpose.
> >
> > I'm not opposed to the idea (although I agree with Amir that it should be
> > tunable - we have /proc/sys/fs/fanotify/ for similar things). Just I'm not
> > sure it will have the desired deterring effect for fanotify users wanting
> > to blame the kernel. What usually convinces them is showing where their
> > tasks supposed to write reply to permission event (i.e., those that have
> > corresponding event fds in their fdtable) are blocked and hence they cannot
> > reply. But with some education I suppose it can work. After all the
> > messages you print contain the task responsible to answer which is already
> > helpful.
> >
> > > +config FANOTIFY_PERM_WATCHDOG
> > > +       bool "fanotify permission event watchdog"
> > > +       depends on FANOTIFY_ACCESS_PERMISSIONS
> > > +       default n
> >
> > As Amir wrote, I don't think we need a kconfig for this, configuration
> > through /proc/sys/fs/fanotify/ will be much more flexible.
> >
> > > diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> > > index b44e70e44be6..8b60fbb9594f 100644
> > > --- a/fs/notify/fanotify/fanotify.h
> > > +++ b/fs/notify/fanotify/fanotify.h
> > > @@ -438,10 +438,14 @@ FANOTIFY_ME(struct fanotify_event *event)
> > >  struct fanotify_perm_event {
> > >       struct fanotify_event fae;
> > >       struct path path;
> > > -     const loff_t *ppos;             /* optional file range info */
> > > +     union {
> > > +             const loff_t *ppos;     /* optional file range info */
> > > +             pid_t pid;              /* pid of task processing the event */
> > > +     };
> >
> > I think Amir complained about the generic 'pid' name already. Maybe
> > processing_pid? Also I'd just get rid of the union. We don't have *that*
> > many permission events that 4 bytes would matter and possible interactions
> > between pre-content events and this watchdog using the same space make me
> > somewhat uneasy.

Amir, Jan, thanks for the reviews.

I forgot about this, but now dug it out again and hopefully addressed
all comments in v2:

  https://lore.kernel.org/linux-fsdevel/20250909143053.112171-1-mszeredi@redhat.com/

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-09-09 14:34 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-03 13:05 [RFC PATCH] fanotify: add watchdog for permission events Miklos Szeredi
2025-07-03 15:42 ` Amir Goldstein
2025-07-03 16:22   ` Miklos Szeredi
2025-07-04  9:56 ` Jan Kara
2025-07-04 10:22   ` Amir Goldstein
2025-09-09 14:34     ` Miklos Szeredi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).