[PATCH] fanotify: support custom default close response

linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] fanotify: support custom default close response
@ 2025-06-23 19:25 Ibrahim Jirdeh
  2025-06-24  6:30 ` Amir Goldstein
  2025-06-24 11:11 ` kernel test robot
  0 siblings, 2 replies; 24+ messages in thread
From: Ibrahim Jirdeh @ 2025-06-23 19:25 UTC (permalink / raw)
  To: ibrahimjirdeh; +Cc: jack, amir73il, josef, lesha, linux-fsdevel, sargun

Currently the default response for pending events is FAN_ALLOW.
This makes default close response configurable. The main goal
of these changes would be to provide better handling for pending
events for lazy file loading use cases which may back fanotify
events by a long-lived daemon. For earlier discussion see:
https://lore.kernel.org/linux-fsdevel/6za2mngeqslmqjg3icoubz37hbbxi6bi44canfsg2aajgkialt@c3ujlrjzkppr/
This implements the first approach outlined there of providing
configuration for response on group close. This is supported by
writing a response with
.fd = FAN_NOFD
.response = FAN_DENY | FAN_DEFAULT
which modifies the group property default_response

Signed-off-by: Ibrahim Jirdeh <ibrahimjirdeh@meta.com>
---
 fs/notify/fanotify/fanotify_user.c  | 14 ++++++++++++--
 include/linux/fanotify.h            |  2 +-
 include/linux/fsnotify_backend.h    |  1 +
 include/uapi/linux/fanotify.h       |  1 +
 tools/include/uapi/linux/fanotify.h |  1 +
 5 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index b192ee068a7a..02669abff4a5 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -378,6 +378,13 @@ static int process_access_response(struct fsnotify_group *group,
 		return -EINVAL;
 	}
 
+	if (response & FAN_DEFAULT) {
+		if (fd != FAN_NOFD)
+			return -EINVAL;
+		group->default_response = response & FANOTIFY_RESPONSE_ACCESS;
+		return 0;
+	}
+
 	if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT))
 		return -EINVAL;
 
@@ -1023,7 +1030,8 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 		event = list_first_entry(&group->fanotify_data.access_list,
 				struct fanotify_perm_event, fae.fse.list);
 		list_del_init(&event->fae.fse.list);
-		finish_permission_event(group, event, FAN_ALLOW, NULL);
+		finish_permission_event(group, event,
+				group->default_response, NULL);
 		spin_lock(&group->notification_lock);
 	}
 
@@ -1040,7 +1048,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 			fsnotify_destroy_event(group, fsn_event);
 		} else {
 			finish_permission_event(group, FANOTIFY_PERM(event),
-						FAN_ALLOW, NULL);
+						group->default_response, NULL);
 		}
 		spin_lock(&group->notification_lock);
 	}
@@ -1640,6 +1648,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		goto out_destroy_group;
 	}
 
+	group->default_response = FAN_ALLOW;
+
 	BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_QUEUE));
 	if (flags & FAN_UNLIMITED_QUEUE) {
 		group->max_events = UINT_MAX;
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 879cff5eccd4..182fc574b848 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -134,7 +134,7 @@
 
 /* These masks check for invalid bits in permission responses. */
 #define FANOTIFY_RESPONSE_ACCESS (FAN_ALLOW | FAN_DENY)
-#define FANOTIFY_RESPONSE_FLAGS (FAN_AUDIT | FAN_INFO)
+#define FANOTIFY_RESPONSE_FLAGS (FAN_AUDIT | FAN_INFO | FAN_DEFAULT)
 #define FANOTIFY_RESPONSE_VALID_MASK \
 	(FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS | \
 	 (FAN_ERRNO_MASK << FAN_ERRNO_SHIFT))
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index d4034ddaf392..9683396acda6 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -231,6 +231,7 @@ struct fsnotify_group {
 	unsigned int max_events;		/* maximum events allowed on the list */
 	enum fsnotify_group_prio priority;	/* priority for sending events */
 	bool shutdown;		/* group is being shut down, don't queue more events */
+	unsigned int default_response; /* default response sent on group close */
 
 #define FSNOTIFY_GROUP_USER	0x01 /* user allocated group */
 #define FSNOTIFY_GROUP_DUPS	0x02 /* allow multiple marks per object */
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index e710967c7c26..7badde273a66 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -254,6 +254,7 @@ struct fanotify_response_info_audit_rule {
 
 #define FAN_AUDIT	0x10	/* Bitmask to create audit record for result */
 #define FAN_INFO	0x20	/* Bitmask to indicate additional information */
+#define FAN_DEFAULT	0x30	/* Bitmask to set default response on close */
 
 /* No fd set in event */
 #define FAN_NOFD	-1
diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h
index e710967c7c26..7badde273a66 100644
--- a/tools/include/uapi/linux/fanotify.h
+++ b/tools/include/uapi/linux/fanotify.h
@@ -254,6 +254,7 @@ struct fanotify_response_info_audit_rule {
 
 #define FAN_AUDIT	0x10	/* Bitmask to create audit record for result */
 #define FAN_INFO	0x20	/* Bitmask to indicate additional information */
+#define FAN_DEFAULT	0x30	/* Bitmask to set default response on close */
 
 /* No fd set in event */
 #define FAN_NOFD	-1
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-06-23 19:25 [PATCH] fanotify: support custom default close response Ibrahim Jirdeh
@ 2025-06-24  6:30 ` Amir Goldstein
  2025-06-26 21:19   ` Ibrahim Jirdeh
  2025-06-30 15:36   ` Jan Kara
  2025-06-24 11:11 ` kernel test robot
  1 sibling, 2 replies; 24+ messages in thread
From: Amir Goldstein @ 2025-06-24  6:30 UTC (permalink / raw)
  To: Ibrahim Jirdeh; +Cc: jack, josef, lesha, linux-fsdevel, sargun

On Mon, Jun 23, 2025 at 9:26 PM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
>
> Currently the default response for pending events is FAN_ALLOW.
> This makes default close response configurable. The main goal
> of these changes would be to provide better handling for pending
> events for lazy file loading use cases which may back fanotify
> events by a long-lived daemon. For earlier discussion see:
> https://lore.kernel.org/linux-fsdevel/6za2mngeqslmqjg3icoubz37hbbxi6bi44canfsg2aajgkialt@c3ujlrjzkppr/

These lore links are typically placed at the commit message tail block
if related to a suggestion you would typically use:

Suggested-by: Amir Goldstein <amir73il@gmail.com>
Link: https://lore.kernel.org/linux-fsdevel/CAOQ4uxi6PvAcT1vL0d0e+7YjvkfU-kwFVVMAN-tc-FKXe1wtSg@mail.gmail.com/
Signed-off-by: Ibrahim Jirdeh <ibrahimjirdeh@meta.com>

This way reviewers whose response is "what a terrible idea!" can
point their arrows at me instead of you ;)

Note that this is a more accurate link to the message where the default
response API was proposed, so readers won't need to sift through
this long thread to find the reference.

> This implements the first approach outlined there of providing
> configuration for response on group close. This is supported by
> writing a response with
> .fd = FAN_NOFD
> .response = FAN_DENY | FAN_DEFAULT
> which modifies the group property default_response
>
> Signed-off-by: Ibrahim Jirdeh <ibrahimjirdeh@meta.com>
> ---
>  fs/notify/fanotify/fanotify_user.c  | 14 ++++++++++++--
>  include/linux/fanotify.h            |  2 +-
>  include/linux/fsnotify_backend.h    |  1 +
>  include/uapi/linux/fanotify.h       |  1 +
>  tools/include/uapi/linux/fanotify.h |  1 +
>  5 files changed, 16 insertions(+), 3 deletions(-)
>
> diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
> index b192ee068a7a..02669abff4a5 100644
> --- a/fs/notify/fanotify/fanotify_user.c
> +++ b/fs/notify/fanotify/fanotify_user.c
> @@ -378,6 +378,13 @@ static int process_access_response(struct fsnotify_group *group,
>                 return -EINVAL;
>         }
>
> +       if (response & FAN_DEFAULT) {
> +               if (fd != FAN_NOFD)
> +                       return -EINVAL;

I think we also need to check that no bits other than the allowed bits
for default response
are set, for example, if user attempts to do:
 .response = FAN_DENY | FAN_AUDIT | FAN_DEFAULT

But that opens up the question, do we want to also allow custom
error in default response, e.g.:
 .response = FAN_DENY_ERRNO(EAGAIN) | FAN_DEFAULT

Anyway, we do not have to implement custom default error from the
start. It will complicate the implementation a bit, but as long as you deny
setting the default response with unsupported flags, we can extend it later.

> +               group->default_response = response & FANOTIFY_RESPONSE_ACCESS;
> +               return 0;
> +       }
> +
>         if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT))
>                 return -EINVAL;
>
> @@ -1023,7 +1030,8 @@ static int fanotify_release(struct inode *ignored, struct file *file)
>                 event = list_first_entry(&group->fanotify_data.access_list,
>                                 struct fanotify_perm_event, fae.fse.list);
>                 list_del_init(&event->fae.fse.list);
> -               finish_permission_event(group, event, FAN_ALLOW, NULL);
> +               finish_permission_event(group, event,
> +                               group->default_response, NULL);
>                 spin_lock(&group->notification_lock);
>         }
>
> @@ -1040,7 +1048,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
>                         fsnotify_destroy_event(group, fsn_event);
>                 } else {
>                         finish_permission_event(group, FANOTIFY_PERM(event),
> -                                               FAN_ALLOW, NULL);
> +                                               group->default_response, NULL);
>                 }
>                 spin_lock(&group->notification_lock);
>         }
> @@ -1640,6 +1648,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
>                 goto out_destroy_group;
>         }
>
> +       group->default_response = FAN_ALLOW;
> +
>         BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_QUEUE));
>         if (flags & FAN_UNLIMITED_QUEUE) {
>                 group->max_events = UINT_MAX;
> diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
> index 879cff5eccd4..182fc574b848 100644
> --- a/include/linux/fanotify.h
> +++ b/include/linux/fanotify.h
> @@ -134,7 +134,7 @@
>
>  /* These masks check for invalid bits in permission responses. */
>  #define FANOTIFY_RESPONSE_ACCESS (FAN_ALLOW | FAN_DENY)
> -#define FANOTIFY_RESPONSE_FLAGS (FAN_AUDIT | FAN_INFO)
> +#define FANOTIFY_RESPONSE_FLAGS (FAN_AUDIT | FAN_INFO | FAN_DEFAULT)
>  #define FANOTIFY_RESPONSE_VALID_MASK \
>         (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS | \
>          (FAN_ERRNO_MASK << FAN_ERRNO_SHIFT))
> diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
> index d4034ddaf392..9683396acda6 100644
> --- a/include/linux/fsnotify_backend.h
> +++ b/include/linux/fsnotify_backend.h
> @@ -231,6 +231,7 @@ struct fsnotify_group {
>         unsigned int max_events;                /* maximum events allowed on the list */
>         enum fsnotify_group_prio priority;      /* priority for sending events */
>         bool shutdown;          /* group is being shut down, don't queue more events */
> +       unsigned int default_response; /* default response sent on group close */
>
>  #define FSNOTIFY_GROUP_USER    0x01 /* user allocated group */
>  #define FSNOTIFY_GROUP_DUPS    0x02 /* allow multiple marks per object */
> diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
> index e710967c7c26..7badde273a66 100644
> --- a/include/uapi/linux/fanotify.h
> +++ b/include/uapi/linux/fanotify.h
> @@ -254,6 +254,7 @@ struct fanotify_response_info_audit_rule {
>
>  #define FAN_AUDIT      0x10    /* Bitmask to create audit record for result */
>  #define FAN_INFO       0x20    /* Bitmask to indicate additional information */
> +#define FAN_DEFAULT    0x30    /* Bitmask to set default response on close */
>
>  /* No fd set in event */
>  #define FAN_NOFD       -1
> diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h
> index e710967c7c26..7badde273a66 100644
> --- a/tools/include/uapi/linux/fanotify.h
> +++ b/tools/include/uapi/linux/fanotify.h
> @@ -254,6 +254,7 @@ struct fanotify_response_info_audit_rule {
>
>  #define FAN_AUDIT      0x10    /* Bitmask to create audit record for result */
>  #define FAN_INFO       0x20    /* Bitmask to indicate additional information */
> +#define FAN_DEFAULT    0x30    /* Bitmask to set default response on close */
>
>  /* No fd set in event */
>  #define FAN_NOFD       -1
> --
> 2.47.1
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-06-23 19:25 [PATCH] fanotify: support custom default close response Ibrahim Jirdeh
  2025-06-24  6:30 ` Amir Goldstein
@ 2025-06-24 11:11 ` kernel test robot
  1 sibling, 0 replies; 24+ messages in thread
From: kernel test robot @ 2025-06-24 11:11 UTC (permalink / raw)
  To: Ibrahim Jirdeh
  Cc: oe-kbuild-all, jack, amir73il, josef, lesha, linux-fsdevel,
	sargun

Hi Ibrahim,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linus/master]
[also build test WARNING on v6.16-rc3 next-20250623]
[cannot apply to jack-fs/fsnotify]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Ibrahim-Jirdeh/fanotify-support-custom-default-close-response/20250624-032902
base:   linus/master
patch link:    https://lore.kernel.org/r/20250623192503.2673076-1-ibrahimjirdeh%40meta.com
patch subject: [PATCH] fanotify: support custom default close response
config: arm-randconfig-002-20250624 (https://download.01.org/0day-ci/archive/20250624/202506241819.lF9Wd4Tn-lkp@intel.com/config)
compiler: arm-linux-gnueabi-gcc (GCC) 8.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250624/202506241819.lF9Wd4Tn-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202506241819.lF9Wd4Tn-lkp@intel.com/

All warnings (new ones prefixed by >>):

   fs/notify/fanotify/fanotify_user.c: In function 'finish_permission_event.constprop':
>> fs/notify/fanotify/fanotify_user.c:316:3: warning: argument 2 null where non-null expected [-Wnonnull]
      memcpy(&event->audit_rule, friar, sizeof(*friar));
      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   In file included from include/linux/string.h:65,
                    from include/linux/bitmap.h:13,
                    from include/linux/cpumask.h:12,
                    from include/linux/smp.h:13,
                    from include/linux/lockdep.h:14,
                    from include/linux/rcupdate.h:29,
                    from include/linux/sysctl.h:26,
                    from include/linux/fanotify.h:5,
                    from fs/notify/fanotify/fanotify_user.c:2:
   arch/arm/include/asm/string.h:20:15: note: in a call to function 'memcpy' declared here
    extern void * memcpy(void *, const void *, __kernel_size_t);
                  ^~~~~~


vim +316 fs/notify/fanotify/fanotify_user.c

70529a199574c1 Richard Guy Briggs 2023-02-03  301  
40873284d7106f Jan Kara           2019-01-08  302  /*
40873284d7106f Jan Kara           2019-01-08  303   * Finish processing of permission event by setting it to ANSWERED state and
40873284d7106f Jan Kara           2019-01-08  304   * drop group->notification_lock.
40873284d7106f Jan Kara           2019-01-08  305   */
40873284d7106f Jan Kara           2019-01-08  306  static void finish_permission_event(struct fsnotify_group *group,
70529a199574c1 Richard Guy Briggs 2023-02-03  307  				    struct fanotify_perm_event *event, u32 response,
70529a199574c1 Richard Guy Briggs 2023-02-03  308  				    struct fanotify_response_info_audit_rule *friar)
40873284d7106f Jan Kara           2019-01-08  309  				    __releases(&group->notification_lock)
40873284d7106f Jan Kara           2019-01-08  310  {
fabf7f29b3e2ce Jan Kara           2019-01-08  311  	bool destroy = false;
fabf7f29b3e2ce Jan Kara           2019-01-08  312  
40873284d7106f Jan Kara           2019-01-08  313  	assert_spin_locked(&group->notification_lock);
70529a199574c1 Richard Guy Briggs 2023-02-03  314  	event->response = response & ~FAN_INFO;
70529a199574c1 Richard Guy Briggs 2023-02-03  315  	if (response & FAN_INFO)
70529a199574c1 Richard Guy Briggs 2023-02-03 @316  		memcpy(&event->audit_rule, friar, sizeof(*friar));
70529a199574c1 Richard Guy Briggs 2023-02-03  317  
fabf7f29b3e2ce Jan Kara           2019-01-08  318  	if (event->state == FAN_EVENT_CANCELED)
fabf7f29b3e2ce Jan Kara           2019-01-08  319  		destroy = true;
fabf7f29b3e2ce Jan Kara           2019-01-08  320  	else
40873284d7106f Jan Kara           2019-01-08  321  		event->state = FAN_EVENT_ANSWERED;
40873284d7106f Jan Kara           2019-01-08  322  	spin_unlock(&group->notification_lock);
fabf7f29b3e2ce Jan Kara           2019-01-08  323  	if (destroy)
fabf7f29b3e2ce Jan Kara           2019-01-08  324  		fsnotify_destroy_event(group, &event->fae.fse);
40873284d7106f Jan Kara           2019-01-08  325  }
40873284d7106f Jan Kara           2019-01-08  326  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH] fanotify: support custom default close response
  2025-06-24  6:30 ` Amir Goldstein
@ 2025-06-26 21:19   ` Ibrahim Jirdeh
  2025-06-30 15:36   ` Jan Kara
  1 sibling, 0 replies; 24+ messages in thread
From: Ibrahim Jirdeh @ 2025-06-26 21:19 UTC (permalink / raw)
  To: amir73il; +Cc: ibrahimjirdeh, jack, josef, lesha, linux-fsdevel, sargun

> On 6/23/25, 11:32 PM, "Amir Goldstein" <amir73il@gmail.com <mailto:amir73il@gmail.com>> wrote:
> > On Mon, Jun 23, 2025 at 9:26 PM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
> >
> > Currently the default response for pending events is FAN\_ALLOW.
> > This makes default close response configurable. The main goal
> > of these changes would be to provide better handling for pending
> > events for lazy file loading use cases which may back fanotify
> > events by a long-lived daemon. For earlier discussion see:
> > [https://lore.kernel.org/linux-fsdevel/6za2mngeqslmqjg3icoubz37hbbxi6bi44canfsg2aajgkialt@c3ujlrjzkppr/](https://lore.kernel.org/linux-fsdevel/6za2mngeqslmqjg3icoubz37hbbxi6bi44canfsg2aajgkialt@c3ujlrjzkppr/)
>
> These lore links are typically placed at the commit message tail block
> if related to a suggestion you would typically use:
>
> Suggested-by: Amir Goldstein <amir73il@gmail.com>
> Link: [https://lore.kernel.org/linux-fsdevel/CAOQ4uxi6PvAcT1vL0d0e+7YjvkfU-kwFVVMAN-tc-FKXe1wtSg@mail.gmail.com/](https://lore.kernel.org/linux-fsdevel/CAOQ4uxi6PvAcT1vL0d0e+7YjvkfU-kwFVVMAN-tc-FKXe1wtSg@mail.gmail.com/)
> Signed-off-by: Ibrahim Jirdeh <ibrahimjirdeh@meta.com>
>
> This way reviewers whose response is "what a terrible idea!" can
> point their arrows at me instead of you ;)
>
> Note that this is a more accurate link to the message where the default
> response API was proposed, so readers won't need to sift through
> this long thread to find the reference.
>
> >
> > This implements the first approach outlined there of providing
> > configuration for response on group close. This is supported by
> > writing a response with
> > .fd = FAN\_NOFD
> > .response = FAN\_DENY | FAN\_DEFAULT
> > which modifies the group property default\_response
> >
> > Signed-off-by: Ibrahim Jirdeh <ibrahimjirdeh@meta.com>
> > ---
> >  fs/notify/fanotify/fanotify\_user.c  | 14 ++++++++++++--
> >  include/linux/fanotify.h            |  2 +-
> >  include/linux/fsnotify\_backend.h    |  1 +
> >  include/uapi/linux/fanotify.h       |  1 +
> >  tools/include/uapi/linux/fanotify.h |  1 +
> >  5 files changed, 16 insertions(+), 3 deletions(-)
> >
> > diff --git a/fs/notify/fanotify/fanotify\_user.c b/fs/notify/fanotify/fanotify\_user.c
> > index b192ee068a7a..02669abff4a5 100644
> > --- a/fs/notify/fanotify/fanotify\_user.c
> > +++ b/fs/notify/fanotify/fanotify\_user.c
> > @@ -378,6 +378,13 @@ static int process\_access\_response(struct fsnotify\_group \*group,
> >                 return -EINVAL;
> >         }
> >
> > +       if (response & FAN\_DEFAULT) {
> > +               if (fd != FAN\_NOFD)
> > +                       return -EINVAL;
> >
> I think we also need to check that no bits other than the allowed bits
> for default response
> are set, for example, if user attempts to do:
>  .response = FAN\_DENY | FAN\_AUDIT | FAN\_DEFAULT
>
> But that opens up the question, do we want to also allow custom
> error in default response, e.g.:
>  .response = FAN\_DENY\_ERRNO(EAGAIN) | FAN\_DEFAULT
>
> Anyway, we do not have to implement custom default error from the
> start. It will complicate the implementation a bit, but as long as you deny
> setting the default response with unsupported flags, we can extend it later.

Sure I can update this to disallow unexpected bits when updating default response.
It does make sense to me to also support custom error in default response, I can
help with exending the feature either as a later part of this series, or as
future follow up. Also will update the links and commit tail block as you've
suggested.

> >
> > +               group->default\_response = response & FANOTIFY\_RESPONSE\_ACCESS;
> > +               return 0;
> > +       }
> > +
> >         if ((response & FAN\_AUDIT) && !FAN\_GROUP\_FLAG(group, FAN\_ENABLE\_AUDIT))
> >                 return -EINVAL;
> >
> > @@ -1023,7 +1030,8 @@ static int fanotify\_release(struct inode \*ignored, struct file \*file)
> >                 event = list\_first\_entry(&group->fanotify\_data.access\_list,
> >                                 struct fanotify\_perm\_event, fae.fse.list);
> >                 list\_del\_init(&event->fae.fse.list);
> > -               finish\_permission\_event(group, event, FAN\_ALLOW, NULL);
> > +               finish\_permission\_event(group, event,
> > +                               group->default\_response, NULL);
> >                 spin\_lock(&group->notification\_lock);
> >         }
> >
> > @@ -1040,7 +1048,7 @@ static int fanotify\_release(struct inode \*ignored, struct file \*file)
> >                         fsnotify\_destroy\_event(group, fsn\_event);
> >                 } else {
> >                         finish\_permission\_event(group, FANOTIFY\_PERM(event),
> > -                                               FAN\_ALLOW, NULL);
> > +                                               group->default\_response, NULL);
> >                 }
> >                 spin\_lock(&group->notification\_lock);
> >         }
> > @@ -1640,6 +1648,8 @@ SYSCALL\_DEFINE2(fanotify\_init, unsigned int, flags, unsigned int, event\_f\_flags)
> >                 goto out\_destroy\_group;
> >         }
> >
> > +       group->default\_response = FAN\_ALLOW;
> > +
> >         BUILD\_BUG\_ON(!(FANOTIFY\_ADMIN\_INIT\_FLAGS & FAN\_UNLIMITED\_QUEUE));
> >         if (flags & FAN\_UNLIMITED\_QUEUE) {
> >                 group->max\_events = UINT\_MAX;
> > diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
> > index 879cff5eccd4..182fc574b848 100644
> > --- a/include/linux/fanotify.h
> > +++ b/include/linux/fanotify.h
> > @@ -134,7 +134,7 @@
> >
> >  /\* These masks check for invalid bits in permission responses. \*/
> >  #define FANOTIFY\_RESPONSE\_ACCESS (FAN\_ALLOW | FAN\_DENY)
> > -#define FANOTIFY\_RESPONSE\_FLAGS (FAN\_AUDIT | FAN\_INFO)
> > +#define FANOTIFY\_RESPONSE\_FLAGS (FAN\_AUDIT | FAN\_INFO | FAN\_DEFAULT)
> >  #define FANOTIFY\_RESPONSE\_VALID\_MASK \\
> >         (FANOTIFY\_RESPONSE\_ACCESS | FANOTIFY\_RESPONSE\_FLAGS | \\
> >          (FAN\_ERRNO\_MASK << FAN\_ERRNO\_SHIFT))
> > diff --git a/include/linux/fsnotify\_backend.h b/include/linux/fsnotify\_backend.h
> > index d4034ddaf392..9683396acda6 100644
> > --- a/include/linux/fsnotify\_backend.h
> > +++ b/include/linux/fsnotify\_backend.h
> > @@ -231,6 +231,7 @@ struct fsnotify\_group {
> >         unsigned int max\_events;                /\* maximum events allowed on the list \*/
> >         enum fsnotify\_group\_prio priority;      /\* priority for sending events \*/
> >         bool shutdown;          /\* group is being shut down, don't queue more events \*/
> > +       unsigned int default\_response; /\* default response sent on group close \*/
> >
> >  #define FSNOTIFY\_GROUP\_USER    0x01 /\* user allocated group \*/
> >  #define FSNOTIFY\_GROUP\_DUPS    0x02 /\* allow multiple marks per object \*/
> > diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
> > index e710967c7c26..7badde273a66 100644
> > --- a/include/uapi/linux/fanotify.h
> > +++ b/include/uapi/linux/fanotify.h
> > @@ -254,6 +254,7 @@ struct fanotify\_response\_info\_audit\_rule {
> >
> >  #define FAN\_AUDIT      0x10    /\* Bitmask to create audit record for result \*/
> >  #define FAN\_INFO       0x20    /\* Bitmask to indicate additional information \*/
> > +#define FAN\_DEFAULT    0x30    /\* Bitmask to set default response on close \*/
> >
> >  /\* No fd set in event \*/
> >  #define FAN\_NOFD       -1
> > diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h
> > index e710967c7c26..7badde273a66 100644
> > --- a/tools/include/uapi/linux/fanotify.h
> > +++ b/tools/include/uapi/linux/fanotify.h
> > @@ -254,6 +254,7 @@ struct fanotify\_response\_info\_audit\_rule {
> >
> >  #define FAN\_AUDIT      0x10    /\* Bitmask to create audit record for result \*/
> >  #define FAN\_INFO       0x20    /\* Bitmask to indicate additional information \*/
> > +#define FAN\_DEFAULT    0x30    /\* Bitmask to set default response on close \*/
> >
> >  /\* No fd set in event \*/
> >  #define FAN\_NOFD       -1
> > --
> > 2.47.1

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-06-24  6:30 ` Amir Goldstein
  2025-06-26 21:19   ` Ibrahim Jirdeh
@ 2025-06-30 15:36   ` Jan Kara
  2025-06-30 15:56     ` Amir Goldstein
  1 sibling, 1 reply; 24+ messages in thread
From: Jan Kara @ 2025-06-30 15:36 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Ibrahim Jirdeh, jack, josef, lesha, linux-fsdevel, sargun

On Tue 24-06-25 08:30:03, Amir Goldstein wrote:
> On Mon, Jun 23, 2025 at 9:26 PM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
> >
> > Currently the default response for pending events is FAN_ALLOW.
> > This makes default close response configurable. The main goal
> > of these changes would be to provide better handling for pending
> > events for lazy file loading use cases which may back fanotify
> > events by a long-lived daemon. For earlier discussion see:
> > https://lore.kernel.org/linux-fsdevel/6za2mngeqslmqjg3icoubz37hbbxi6bi44canfsg2aajgkialt@c3ujlrjzkppr/
> 
> These lore links are typically placed at the commit message tail block
> if related to a suggestion you would typically use:
> 
> Suggested-by: Amir Goldstein <amir73il@gmail.com>
> Link: https://lore.kernel.org/linux-fsdevel/CAOQ4uxi6PvAcT1vL0d0e+7YjvkfU-kwFVVMAN-tc-FKXe1wtSg@mail.gmail.com/
> Signed-off-by: Ibrahim Jirdeh <ibrahimjirdeh@meta.com>
> 
> This way reviewers whose response is "what a terrible idea!" can
> point their arrows at me instead of you ;)
> 
> Note that this is a more accurate link to the message where the default
> response API was proposed, so readers won't need to sift through
> this long thread to find the reference.

I've reread that thread to remember how this is supposed to be used. After
thinking about it now maybe we could just modify how pending fanotify
events behave in case of group destruction? Instead of setting FAN_ALLOW in
fanotify_release() we'd set a special event state that will make fanotify
group iteration code bubble up back to fsnotify() and restart the event
generation loop there?

In the usual case this would behave the same way as setting FAN_ALLOW (just
in case of multiple permission event watchers some will get the event two
times which shouldn't matter). In case of careful design with fd store
etc., the daemon can setup the new notification group as needed and then
close the fd from the old notification group at which point it would
receive all the pending events in the new group. I can even see us adding
ioctl to the fanotify group which when called will result in the same
behavior (i.e., all pending permission events will get the "retry"
response). That way the new daemon could just take the old fd from the fd
store and call this ioctl to receive all the pending events again.

No need for the new default response. We probably need to provide a group
feature flag for this so that userspace can safely query kernel support for
this behavior but otherwise even that wouldn't be really needed.

What do you guys think?

								Honza

-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-06-30 15:36   ` Jan Kara
@ 2025-06-30 15:56     ` Amir Goldstein
  2025-07-02 16:15       ` Jan Kara
  0 siblings, 1 reply; 24+ messages in thread
From: Amir Goldstein @ 2025-06-30 15:56 UTC (permalink / raw)
  To: Jan Kara; +Cc: Ibrahim Jirdeh, josef, lesha, linux-fsdevel, sargun

On Mon, Jun 30, 2025 at 5:36 PM Jan Kara <jack@suse.cz> wrote:
>
> On Tue 24-06-25 08:30:03, Amir Goldstein wrote:
> > On Mon, Jun 23, 2025 at 9:26 PM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
> > >
> > > Currently the default response for pending events is FAN_ALLOW.
> > > This makes default close response configurable. The main goal
> > > of these changes would be to provide better handling for pending
> > > events for lazy file loading use cases which may back fanotify
> > > events by a long-lived daemon. For earlier discussion see:
> > > https://lore.kernel.org/linux-fsdevel/6za2mngeqslmqjg3icoubz37hbbxi6bi44canfsg2aajgkialt@c3ujlrjzkppr/
> >
> > These lore links are typically placed at the commit message tail block
> > if related to a suggestion you would typically use:
> >
> > Suggested-by: Amir Goldstein <amir73il@gmail.com>
> > Link: https://lore.kernel.org/linux-fsdevel/CAOQ4uxi6PvAcT1vL0d0e+7YjvkfU-kwFVVMAN-tc-FKXe1wtSg@mail.gmail.com/
> > Signed-off-by: Ibrahim Jirdeh <ibrahimjirdeh@meta.com>
> >
> > This way reviewers whose response is "what a terrible idea!" can
> > point their arrows at me instead of you ;)
> >
> > Note that this is a more accurate link to the message where the default
> > response API was proposed, so readers won't need to sift through
> > this long thread to find the reference.
>
> I've reread that thread to remember how this is supposed to be used. After
> thinking about it now maybe we could just modify how pending fanotify
> events behave in case of group destruction? Instead of setting FAN_ALLOW in
> fanotify_release() we'd set a special event state that will make fanotify
> group iteration code bubble up back to fsnotify() and restart the event
> generation loop there?
>
> In the usual case this would behave the same way as setting FAN_ALLOW (just
> in case of multiple permission event watchers some will get the event two
> times which shouldn't matter). In case of careful design with fd store
> etc., the daemon can setup the new notification group as needed and then
> close the fd from the old notification group at which point it would
> receive all the pending events in the new group. I can even see us adding
> ioctl to the fanotify group which when called will result in the same
> behavior (i.e., all pending permission events will get the "retry"
> response). That way the new daemon could just take the old fd from the fd
> store and call this ioctl to receive all the pending events again.
>
> No need for the new default response. We probably need to provide a group
> feature flag for this so that userspace can safely query kernel support for
> this behavior but otherwise even that wouldn't be really needed.
>
> What do you guys think?

With proper handover I am not sure why this is needed, because:
- new group gets fd from store and signals old group
- old group does not take any new event, completes in-flight events,
  signals back new group and exists
- new group starts processing events
- so why do we need a complex mechanism in kernel to do what can easily
  be done in usersapce?

Also, regardless I think that we need the default response, because:
- groups starts, set default response to DENY and handsover fd to store
- if group crashes unexpectedly, access to all files is denied, which is
  exactly what we needed to do with the "moderated" mount
- it is similar to access to FUSE mount when server is down

For a requirement that non-populated content MUST NOT be
accessed, the default response is a very easy way to achieve it.

Thanks,
Amir.





>
>                                                                 Honza
>
> --
> Jan Kara <jack@suse.com>
> SUSE Labs, CR

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-06-30 15:56     ` Amir Goldstein
@ 2025-07-02 16:15       ` Jan Kara
  2025-07-02 16:56         ` Amir Goldstein
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Kara @ 2025-07-02 16:15 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Jan Kara, Ibrahim Jirdeh, josef, lesha, linux-fsdevel, sargun

[-- Attachment #1: Type: text/plain, Size: 6405 bytes --]

On Mon 30-06-25 17:56:00, Amir Goldstein wrote:
> On Mon, Jun 30, 2025 at 5:36 PM Jan Kara <jack@suse.cz> wrote:
> > On Tue 24-06-25 08:30:03, Amir Goldstein wrote:
> > > On Mon, Jun 23, 2025 at 9:26 PM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
> > > >
> > > > Currently the default response for pending events is FAN_ALLOW.
> > > > This makes default close response configurable. The main goal
> > > > of these changes would be to provide better handling for pending
> > > > events for lazy file loading use cases which may back fanotify
> > > > events by a long-lived daemon. For earlier discussion see:
> > > > https://lore.kernel.org/linux-fsdevel/6za2mngeqslmqjg3icoubz37hbbxi6bi44canfsg2aajgkialt@c3ujlrjzkppr/
> > >
> > > These lore links are typically placed at the commit message tail block
> > > if related to a suggestion you would typically use:
> > >
> > > Suggested-by: Amir Goldstein <amir73il@gmail.com>
> > > Link: https://lore.kernel.org/linux-fsdevel/CAOQ4uxi6PvAcT1vL0d0e+7YjvkfU-kwFVVMAN-tc-FKXe1wtSg@mail.gmail.com/
> > > Signed-off-by: Ibrahim Jirdeh <ibrahimjirdeh@meta.com>
> > >
> > > This way reviewers whose response is "what a terrible idea!" can
> > > point their arrows at me instead of you ;)
> > >
> > > Note that this is a more accurate link to the message where the default
> > > response API was proposed, so readers won't need to sift through
> > > this long thread to find the reference.
> >
> > I've reread that thread to remember how this is supposed to be used. After
> > thinking about it now maybe we could just modify how pending fanotify
> > events behave in case of group destruction? Instead of setting FAN_ALLOW in
> > fanotify_release() we'd set a special event state that will make fanotify
> > group iteration code bubble up back to fsnotify() and restart the event
> > generation loop there?
> >
> > In the usual case this would behave the same way as setting FAN_ALLOW (just
> > in case of multiple permission event watchers some will get the event two
> > times which shouldn't matter). In case of careful design with fd store
> > etc., the daemon can setup the new notification group as needed and then
> > close the fd from the old notification group at which point it would
> > receive all the pending events in the new group. I can even see us adding
> > ioctl to the fanotify group which when called will result in the same
> > behavior (i.e., all pending permission events will get the "retry"
> > response). That way the new daemon could just take the old fd from the fd
> > store and call this ioctl to receive all the pending events again.
> >
> > No need for the new default response. We probably need to provide a group
> > feature flag for this so that userspace can safely query kernel support for
> > this behavior but otherwise even that wouldn't be really needed.
> >
> > What do you guys think?
> 
> With proper handover I am not sure why this is needed, because:
> - new group gets fd from store and signals old group
> - old group does not take any new event, completes in-flight events,
>   signals back new group and exists
> - new group starts processing events
> - so why do we need a complex mechanism in kernel to do what can easily
>   be done in usersapce?

This works for clean handover (say service update) - no need for any
mechanism (neither retry nor default response there). We are in agreement
here. If retry is supported, it will make the handover somewhat simpler for
userspace but that's not really substantial.

> Also, regardless I think that we need the default response, because:
> - groups starts, set default response to DENY and handsover fd to store
> - if group crashes unexpectedly, access to all files is denied, which is
>   exactly what we needed to do with the "moderated" mount
> - it is similar to access to FUSE mount when server is down

Yes, crashes are what I had in mind. With crashes you have nobody to
cleanly handle events still pending for the old group and you have to solve
it. Reporting FAN_DENY (through default response) is one way, what I
suggest with retry has the advantage that userspace doesn't have to deal
with spurious FAN_DENY errors in case of daemon crash. It is not a huge
benefit (crashes should better be rare ;)) but it is IMO a benefit.

Now regarding your comment about moderated mount: You are somewhat terse on
details so let me try to fill in. First let's differentiate between service
(daemon) and the notification group because they may have a different
lifetime. So the service starts, creates a notification group, places mark
on the sb with pre-content events. You didn't mention a mark in your
description but until the mark is set, the group receives no events so
there's nothing to respond to. Now if the service crashes there are two
options:

1) You didn't save your group fd anywhere. In that case the group and mark
is gone with the crash of the service, all accesses happening after this
moment are allowed => not good. Whether we have default response or not
doesn't really matter in this case for those few events that were possibly
pending. Agreed so far?

2) You've saved group fd to fd store. In this case the group is (from
kernel POV) fully alive even after the service crash and the default
response does not activate. The events will be queued to the group and
waiting for reply. No access to the fs is allowed to happen which is good.
Eventually the new service starts and we are in the situation I describe 3
paragraphs above about handling pending events.

So if we'd implement resending of pending events after group closure, I
don't see how default response (at least in its current form) would be
useful for anything.

Why I like the proposal of resending pending events:
a) No spurious FAN_DENY errors in case of service crash
b) No need for new concept (and API) for default response, just a feature
   flag.
c) With additional ioctl to trigger resending pending events without group
   closure, the newly started service can simply reuse the
   same notification group (even in case of old service crash) thus
   inheriting all placed marks (which is something Ibrahim would like to
   have).

Regarding complexity of the approach I propose the attached (untested)
patch should implement it and I don't think it is very complex logic...
So what do you think now? :)

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

[-- Attachment #2: 0001-fanotify-Add-support-for-resending-unanswered-permis.patch --]
[-- Type: text/x-patch, Size: 5323 bytes --]

From 5cc27dfbe7be415299d21ac56d38f55014a36a1f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 2 Jul 2025 18:06:13 +0200
Subject: [PATCH] fanotify: Add support for resending unanswered permission
 events

For handling of a crash of HSM service daemon, we need to somehow handle
permission events which were already reported but not yet answered. We
cannot just allow them as that will let the application access
unpopulated content. Add support for resending these events on group
shutdown. The intended use is that the HSM service will store fd
pointing to its notification group info fd store so the notification
group survives service crash. The newly started service can setup
necessary watches and then destroy the old notification group at which
point it will receive all the events pending against the old group.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c      |  8 +++++++-
 fs/notify/fanotify/fanotify.h      |  1 +
 fs/notify/fanotify/fanotify_user.c | 13 ++++++++++---
 fs/notify/fsnotify.c               |  7 +++++++
 include/uapi/linux/fanotify.h      |  1 +
 5 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 3083643b864b..3e2a09946603 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -230,7 +230,8 @@ static int fanotify_get_response(struct fsnotify_group *group,
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
 	ret = wait_event_state(group->fanotify_data.access_waitq,
-				  event->state == FAN_EVENT_ANSWERED,
+				  event->state == FAN_EVENT_ANSWERED ||
+				    event->state == FAN_EVENT_RETRY,
 				  (TASK_KILLABLE|TASK_FREEZABLE));
 
 	/* Signal pending? */
@@ -258,6 +259,11 @@ static int fanotify_get_response(struct fsnotify_group *group,
 		goto out;
 	}
 
+	if (event->state == FAN_EVENT_RETRY) {
+		ret = -ERESTART;
+		goto out;
+	}
+
 	/* userspace responded, convert to something usable */
 	switch (event->response & FANOTIFY_RESPONSE_ACCESS) {
 	case FAN_ALLOW:
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index b78308975082..ff96a5feae92 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -17,6 +17,7 @@ enum {
 	FAN_EVENT_REPORTED,
 	FAN_EVENT_ANSWERED,
 	FAN_EVENT_CANCELED,
+	FAN_EVENT_RETRY,
 };
 
 /*
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index b192ee068a7a..40922c4c7049 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -318,7 +318,10 @@ static void finish_permission_event(struct fsnotify_group *group,
 	if (event->state == FAN_EVENT_CANCELED)
 		destroy = true;
 	else
-		event->state = FAN_EVENT_ANSWERED;
+		if (response)
+			event->state = FAN_EVENT_ANSWERED;
+		else
+			event->state = FAN_EVENT_RETRY;
 	spin_unlock(&group->notification_lock);
 	if (destroy)
 		fsnotify_destroy_event(group, &event->fae.fse);
@@ -1004,6 +1007,10 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
 	struct fsnotify_event *fsn_event;
+	u32 default_response = FAN_ALLOW;
+
+	if (FAN_GROUP_FLAG(group, FAN_RETRY_UNANSWERED))
+		default_response = 0;
 
 	/*
 	 * Stop new events from arriving in the notification queue. since
@@ -1023,7 +1030,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 		event = list_first_entry(&group->fanotify_data.access_list,
 				struct fanotify_perm_event, fae.fse.list);
 		list_del_init(&event->fae.fse.list);
-		finish_permission_event(group, event, FAN_ALLOW, NULL);
+		finish_permission_event(group, event, default_response, NULL);
 		spin_lock(&group->notification_lock);
 	}
 
@@ -1040,7 +1047,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 			fsnotify_destroy_event(group, fsn_event);
 		} else {
 			finish_permission_event(group, FANOTIFY_PERM(event),
-						FAN_ALLOW, NULL);
+						default_response, NULL);
 		}
 		spin_lock(&group->notification_lock);
 	}
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index e2b4f17a48bb..b0eb86124e32 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -588,6 +588,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 	    (!mnt_data || !mnt_data->ns->n_fsnotify_marks))
 		return 0;
 
+resend:
 	if (sb)
 		marks_mask |= READ_ONCE(sb->s_fsnotify_mask);
 	if (mnt)
@@ -649,6 +650,12 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 	ret = 0;
 out:
 	srcu_read_unlock(&fsnotify_mark_srcu, iter_info.srcu_idx);
+	/*
+	 * Resend permission event in case some group got shutdown before it
+	 * could answer
+	 */
+	if (ret == -ERESTART)
+		goto resend;
 
 	return ret;
 }
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index e710967c7c26..4eb2313dbcf0 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -57,6 +57,7 @@
 #define FAN_UNLIMITED_QUEUE	0x00000010
 #define FAN_UNLIMITED_MARKS	0x00000020
 #define FAN_ENABLE_AUDIT	0x00000040
+#define FAN_RETRY_UNANSWERED	0x00008000
 
 /* Flags to determine fanotify event format */
 #define FAN_REPORT_PIDFD	0x00000080	/* Report pidfd for event->pid */
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-02 16:15       ` Jan Kara
@ 2025-07-02 16:56         ` Amir Goldstein
  2025-07-03  7:08           ` Ibrahim Jirdeh
  0 siblings, 1 reply; 24+ messages in thread
From: Amir Goldstein @ 2025-07-02 16:56 UTC (permalink / raw)
  To: Jan Kara; +Cc: Ibrahim Jirdeh, josef, lesha, linux-fsdevel, sargun

On Wed, Jul 2, 2025 at 6:15 PM Jan Kara <jack@suse.cz> wrote:
>
> On Mon 30-06-25 17:56:00, Amir Goldstein wrote:
> > On Mon, Jun 30, 2025 at 5:36 PM Jan Kara <jack@suse.cz> wrote:
> > > On Tue 24-06-25 08:30:03, Amir Goldstein wrote:
> > > > On Mon, Jun 23, 2025 at 9:26 PM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
> > > > >
> > > > > Currently the default response for pending events is FAN_ALLOW.
> > > > > This makes default close response configurable. The main goal
> > > > > of these changes would be to provide better handling for pending
> > > > > events for lazy file loading use cases which may back fanotify
> > > > > events by a long-lived daemon. For earlier discussion see:
> > > > > https://lore.kernel.org/linux-fsdevel/6za2mngeqslmqjg3icoubz37hbbxi6bi44canfsg2aajgkialt@c3ujlrjzkppr/
> > > >
> > > > These lore links are typically placed at the commit message tail block
> > > > if related to a suggestion you would typically use:
> > > >
> > > > Suggested-by: Amir Goldstein <amir73il@gmail.com>
> > > > Link: https://lore.kernel.org/linux-fsdevel/CAOQ4uxi6PvAcT1vL0d0e+7YjvkfU-kwFVVMAN-tc-FKXe1wtSg@mail.gmail.com/
> > > > Signed-off-by: Ibrahim Jirdeh <ibrahimjirdeh@meta.com>
> > > >
> > > > This way reviewers whose response is "what a terrible idea!" can
> > > > point their arrows at me instead of you ;)
> > > >
> > > > Note that this is a more accurate link to the message where the default
> > > > response API was proposed, so readers won't need to sift through
> > > > this long thread to find the reference.
> > >
> > > I've reread that thread to remember how this is supposed to be used. After
> > > thinking about it now maybe we could just modify how pending fanotify
> > > events behave in case of group destruction? Instead of setting FAN_ALLOW in
> > > fanotify_release() we'd set a special event state that will make fanotify
> > > group iteration code bubble up back to fsnotify() and restart the event
> > > generation loop there?
> > >
> > > In the usual case this would behave the same way as setting FAN_ALLOW (just
> > > in case of multiple permission event watchers some will get the event two
> > > times which shouldn't matter). In case of careful design with fd store
> > > etc., the daemon can setup the new notification group as needed and then
> > > close the fd from the old notification group at which point it would
> > > receive all the pending events in the new group. I can even see us adding
> > > ioctl to the fanotify group which when called will result in the same
> > > behavior (i.e., all pending permission events will get the "retry"
> > > response). That way the new daemon could just take the old fd from the fd
> > > store and call this ioctl to receive all the pending events again.
> > >
> > > No need for the new default response. We probably need to provide a group
> > > feature flag for this so that userspace can safely query kernel support for
> > > this behavior but otherwise even that wouldn't be really needed.
> > >
> > > What do you guys think?
> >
> > With proper handover I am not sure why this is needed, because:
> > - new group gets fd from store and signals old group
> > - old group does not take any new event, completes in-flight events,
> >   signals back new group and exists
> > - new group starts processing events
> > - so why do we need a complex mechanism in kernel to do what can easily
> >   be done in usersapce?
>
> This works for clean handover (say service update) - no need for any
> mechanism (neither retry nor default response there). We are in agreement
> here. If retry is supported, it will make the handover somewhat simpler for
> userspace but that's not really substantial.
>
> > Also, regardless I think that we need the default response, because:
> > - groups starts, set default response to DENY and handsover fd to store
> > - if group crashes unexpectedly, access to all files is denied, which is
> >   exactly what we needed to do with the "moderated" mount
> > - it is similar to access to FUSE mount when server is down
>
> Yes, crashes are what I had in mind. With crashes you have nobody to
> cleanly handle events still pending for the old group and you have to solve
> it. Reporting FAN_DENY (through default response) is one way, what I
> suggest with retry has the advantage that userspace doesn't have to deal
> with spurious FAN_DENY errors in case of daemon crash. It is not a huge
> benefit (crashes should better be rare ;)) but it is IMO a benefit.
>
> Now regarding your comment about moderated mount: You are somewhat terse on
> details so let me try to fill in. First let's differentiate between service
> (daemon) and the notification group because they may have a different
> lifetime. So the service starts, creates a notification group, places mark
> on the sb with pre-content events. You didn't mention a mark in your
> description but until the mark is set, the group receives no events so
> there's nothing to respond to. Now if the service crashes there are two
> options:
>
> 1) You didn't save your group fd anywhere. In that case the group and mark
> is gone with the crash of the service, all accesses happening after this
> moment are allowed => not good. Whether we have default response or not
> doesn't really matter in this case for those few events that were possibly
> pending. Agreed so far?

Yes.

>
> 2) You've saved group fd to fd store. In this case the group is (from
> kernel POV) fully alive even after the service crash and the default
> response does not activate. The events will be queued to the group and
> waiting for reply. No access to the fs is allowed to happen which is good.

Well this was my mistake.
I somehow forgot that the events would be blocked in this case.
I had imagined the desired "moderated mount" behavior where
events would be auto denied.

But that would require more work.
It would require decoupling the "control" fd which identifies the group
and can be used to setup marks and for ioctls from the "queue" fd that
is used to reading events and writing responses.

> Eventually the new service starts and we are in the situation I describe 3
> paragraphs above about handling pending events.
>
> So if we'd implement resending of pending events after group closure, I
> don't see how default response (at least in its current form) would be
> useful for anything.
>
> Why I like the proposal of resending pending events:
> a) No spurious FAN_DENY errors in case of service crash
> b) No need for new concept (and API) for default response, just a feature
>    flag.
> c) With additional ioctl to trigger resending pending events without group
>    closure, the newly started service can simply reuse the
>    same notification group (even in case of old service crash) thus
>    inheriting all placed marks (which is something Ibrahim would like to
>    have).

Yes I see the benefits now.

>
> Regarding complexity of the approach I propose the attached (untested)
> patch should implement it and I don't think it is very complex logic...
> So what do you think now? :)
>

Does not look complex at all :)
so no objections.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-02 16:56         ` Amir Goldstein
@ 2025-07-03  7:08           ` Ibrahim Jirdeh
  2025-07-03  8:27             ` Amir Goldstein
  0 siblings, 1 reply; 24+ messages in thread
From: Ibrahim Jirdeh @ 2025-07-03  7:08 UTC (permalink / raw)
  To: amir73il; +Cc: ibrahimjirdeh, jack, josef, lesha, linux-fsdevel, sargun

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="y", Size: 1567 bytes --]

> On Wed, Jul 2, 2025 at 6:15 PM Jan Kara <jack@suse.cz> wrote:
> > Eventually the new service starts and we are in the situation I describe 3
> > paragraphs above about handling pending events.
> >
> > So if we'd implement resending of pending events after group closure, I
> > don't see how default response (at least in its current form) would be
> > useful for anything.
> >
> > Why I like the proposal of resending pending events:
> > a) No spurious FAN_DENY errors in case of service crash
> > b) No need for new concept (and API) for default response, just a feature
> >    flag.
> > c) With additional ioctl to trigger resending pending events without group
> >    closure, the newly started service can simply reuse the
> >    same notification group (even in case of old service crash) thus
> >    inheriting all placed marks (which is something Ibrahim would like to
> >    have).
>

I'm also a fan of the approach of support for resending pending events. As
mentioned exposing this behavior as an ioctl and thereby removing the need to
recreate fanotify group makes the usage a fair bit simpler for our case.

One basic question I have (mainly for understanding), is if the FAN_RETRY flag is
set in the proposed patch, in the case where there is one existing group being
closed (ie no handover setup), what would be the behavior for pending events?
Is it the same as now, events are allowed, just that they get resent once? I don't
think we would ever run into this with proper usage of the new functionality,
just curious.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-03  7:08           ` Ibrahim Jirdeh
@ 2025-07-03  8:27             ` Amir Goldstein
  2025-07-03 14:43               ` Jan Kara
  0 siblings, 1 reply; 24+ messages in thread
From: Amir Goldstein @ 2025-07-03  8:27 UTC (permalink / raw)
  To: Ibrahim Jirdeh; +Cc: jack, josef, lesha, linux-fsdevel, sargun

On Thu, Jul 3, 2025 at 9:10 AM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
>
> > On Wed, Jul 2, 2025 at 6:15 PM Jan Kara <jack@suse.cz> wrote:
> > > Eventually the new service starts and we are in the situation I describe 3
> > > paragraphs above about handling pending events.
> > >
> > > So if we'd implement resending of pending events after group closure, I
> > > don't see how default response (at least in its current form) would be
> > > useful for anything.
> > >
> > > Why I like the proposal of resending pending events:
> > > a) No spurious FAN_DENY errors in case of service crash
> > > b) No need for new concept (and API) for default response, just a feature
> > >    flag.
> > > c) With additional ioctl to trigger resending pending events without group
> > >    closure, the newly started service can simply reuse the
> > >    same notification group (even in case of old service crash) thus
> > >    inheriting all placed marks (which is something Ibrahim would like to
> > >    have).
> >
>
> I'm also a fan of the approach of support for resending pending events. As
> mentioned exposing this behavior as an ioctl and thereby removing the need to
> recreate fanotify group makes the usage a fair bit simpler for our case.
>
> One basic question I have (mainly for understanding), is if the FAN_RETRY flag is
> set in the proposed patch, in the case where there is one existing group being
> closed (ie no handover setup), what would be the behavior for pending events?
> Is it the same as now, events are allowed, just that they get resent once?

Yes, same as now.
Instead of replying FAN_ALLOW, syscall is being restarted
to check if a new watcher was added since this watcher took the event.

Wondering out loud:
Currently we order the marks on the mark obj_list,
within the same priority group, first-subscribed-last-handled.

I never stopped to think if this order made sense or not.
Seems like it was just the easier way to implement insert by priority order.

But if we order the marks first-subscribed-first-handled within the same
priority group, we won't need to restart the syscall to restart mark
list iteration.

The new group of the newly started daemon, will be next in the mark list
after the current stopped group returns FAN_ALLOW.
Isn't that a tad less intrusive handover then restarting the syscall
and causing a bunch of unrelated subsystems to observe the restart?

And I think that the first-subscribed-first-handled order makes a bit more
sense logically.
I mean, if admin really cares about making some super important security
group first, admin could make sure that its a priority service that
starts very early
admin cannot make sure that the important group starts last...

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-03  8:27             ` Amir Goldstein
@ 2025-07-03 14:43               ` Jan Kara
  2025-07-03 16:09                 ` Amir Goldstein
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Kara @ 2025-07-03 14:43 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Ibrahim Jirdeh, jack, josef, lesha, linux-fsdevel, sargun

On Thu 03-07-25 10:27:17, Amir Goldstein wrote:
> On Thu, Jul 3, 2025 at 9:10 AM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
> >
> > > On Wed, Jul 2, 2025 at 6:15 PM Jan Kara <jack@suse.cz> wrote:
> > > > Eventually the new service starts and we are in the situation I describe 3
> > > > paragraphs above about handling pending events.
> > > >
> > > > So if we'd implement resending of pending events after group closure, I
> > > > don't see how default response (at least in its current form) would be
> > > > useful for anything.
> > > >
> > > > Why I like the proposal of resending pending events:
> > > > a) No spurious FAN_DENY errors in case of service crash
> > > > b) No need for new concept (and API) for default response, just a feature
> > > >    flag.
> > > > c) With additional ioctl to trigger resending pending events without group
> > > >    closure, the newly started service can simply reuse the
> > > >    same notification group (even in case of old service crash) thus
> > > >    inheriting all placed marks (which is something Ibrahim would like to
> > > >    have).
> > >
> >
> > I'm also a fan of the approach of support for resending pending events. As
> > mentioned exposing this behavior as an ioctl and thereby removing the need to
> > recreate fanotify group makes the usage a fair bit simpler for our case.
> >
> > One basic question I have (mainly for understanding), is if the FAN_RETRY flag is
> > set in the proposed patch, in the case where there is one existing group being
> > closed (ie no handover setup), what would be the behavior for pending events?
> > Is it the same as now, events are allowed, just that they get resent once?
> 
> Yes, same as now.
> Instead of replying FAN_ALLOW, syscall is being restarted
> to check if a new watcher was added since this watcher took the event.

Yes, just it isn't the whole syscall that's restarted but only the
fsnotify() call.

> Wondering out loud:
> Currently we order the marks on the mark obj_list,
> within the same priority group, first-subscribed-last-handled.
> 
> I never stopped to think if this order made sense or not.
> Seems like it was just the easier way to implement insert by priority order.
> 
> But if we order the marks first-subscribed-first-handled within the same
> priority group, we won't need to restart the syscall to restart mark
> list iteration.
> 
> The new group of the newly started daemon, will be next in the mark list
> after the current stopped group returns FAN_ALLOW.
> Isn't that a tad less intrusive handover then restarting the syscall
> and causing a bunch of unrelated subsystems to observe the restart?
> 
> And I think that the first-subscribed-first-handled order makes a bit more
> sense logically.
> I mean, if admin really cares about making some super important security
> group first, admin could make sure that its a priority service that
> starts very early
> admin cannot make sure that the important group starts last...

So this idea also briefly crossed my mind yesterday but I didn't look into
it in detail. Looking at how we currently do mark iteration in fsnotify
this won't be very easy to implement. iter_info has an array of marks, some
of those are marks we are currently reporting to, some of those may be from
the next group to report to. Some may be even NULL because for this mark
type there were no more marks to report to. So it's difficult to make sure
iter_into will properly pick freshly added group and its marks without
completely restarting mark iteration. I'm not saying it cannot be done but
I'm not sure it's worth the hassle.

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-03 14:43               ` Jan Kara
@ 2025-07-03 16:09                 ` Amir Goldstein
  2025-07-11 22:30                   ` Ibrahim Jirdeh
  0 siblings, 1 reply; 24+ messages in thread
From: Amir Goldstein @ 2025-07-03 16:09 UTC (permalink / raw)
  To: Jan Kara; +Cc: Ibrahim Jirdeh, josef, lesha, linux-fsdevel, sargun

On Thu, Jul 3, 2025 at 4:43 PM Jan Kara <jack@suse.cz> wrote:
>
> On Thu 03-07-25 10:27:17, Amir Goldstein wrote:
> > On Thu, Jul 3, 2025 at 9:10 AM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
> > >
> > > > On Wed, Jul 2, 2025 at 6:15 PM Jan Kara <jack@suse.cz> wrote:
> > > > > Eventually the new service starts and we are in the situation I describe 3
> > > > > paragraphs above about handling pending events.
> > > > >
> > > > > So if we'd implement resending of pending events after group closure, I
> > > > > don't see how default response (at least in its current form) would be
> > > > > useful for anything.
> > > > >
> > > > > Why I like the proposal of resending pending events:
> > > > > a) No spurious FAN_DENY errors in case of service crash
> > > > > b) No need for new concept (and API) for default response, just a feature
> > > > >    flag.
> > > > > c) With additional ioctl to trigger resending pending events without group
> > > > >    closure, the newly started service can simply reuse the
> > > > >    same notification group (even in case of old service crash) thus
> > > > >    inheriting all placed marks (which is something Ibrahim would like to
> > > > >    have).
> > > >
> > >
> > > I'm also a fan of the approach of support for resending pending events. As
> > > mentioned exposing this behavior as an ioctl and thereby removing the need to
> > > recreate fanotify group makes the usage a fair bit simpler for our case.
> > >
> > > One basic question I have (mainly for understanding), is if the FAN_RETRY flag is
> > > set in the proposed patch, in the case where there is one existing group being
> > > closed (ie no handover setup), what would be the behavior for pending events?
> > > Is it the same as now, events are allowed, just that they get resent once?
> >
> > Yes, same as now.
> > Instead of replying FAN_ALLOW, syscall is being restarted
> > to check if a new watcher was added since this watcher took the event.
>
> Yes, just it isn't the whole syscall that's restarted but only the
> fsnotify() call.
>

Right. I missed that.

> > Wondering out loud:
> > Currently we order the marks on the mark obj_list,
> > within the same priority group, first-subscribed-last-handled.
> >
> > I never stopped to think if this order made sense or not.
> > Seems like it was just the easier way to implement insert by priority order.
> >
> > But if we order the marks first-subscribed-first-handled within the same
> > priority group, we won't need to restart the syscall to restart mark
> > list iteration.
> >
> > The new group of the newly started daemon, will be next in the mark list
> > after the current stopped group returns FAN_ALLOW.
> > Isn't that a tad less intrusive handover then restarting the syscall
> > and causing a bunch of unrelated subsystems to observe the restart?
> >
> > And I think that the first-subscribed-first-handled order makes a bit more
> > sense logically.
> > I mean, if admin really cares about making some super important security
> > group first, admin could make sure that its a priority service that
> > starts very early
> > admin cannot make sure that the important group starts last...
>
> So this idea also briefly crossed my mind yesterday but I didn't look into
> it in detail. Looking at how we currently do mark iteration in fsnotify
> this won't be very easy to implement. iter_info has an array of marks, some
> of those are marks we are currently reporting to, some of those may be from
> the next group to report to. Some may be even NULL because for this mark
> type there were no more marks to report to. So it's difficult to make sure
> iter_into will properly pick freshly added group and its marks without
> completely restarting mark iteration. I'm not saying it cannot be done but
> I'm not sure it's worth the hassle.
>

Yes, I see what you mean.
Restarting fsnotify() seems more sensible.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-03 16:09                 ` Amir Goldstein
@ 2025-07-11 22:30                   ` Ibrahim Jirdeh
  2025-07-12  8:08                     ` Amir Goldstein
  0 siblings, 1 reply; 24+ messages in thread
From: Ibrahim Jirdeh @ 2025-07-11 22:30 UTC (permalink / raw)
  To: amir73il; +Cc: ibrahimjirdeh, jack, josef, lesha, linux-fsdevel, sargun

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="y", Size: 2452 bytes --]

> On Thu, Jul 3, 2025 at 4:43 PM Jan Kara <jack@suse.cz> wrote:
> >
> > On Thu 03-07-25 10:27:17, Amir Goldstein wrote:
> > > On Thu, Jul 3, 2025 at 9:10 AM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
> > > >
> > > > > On Wed, Jul 2, 2025 at 6:15 PM Jan Kara <jack@suse.cz> wrote:
> > > > > > Eventually the new service starts and we are in the situation I describe 3
> > > > > > paragraphs above about handling pending events.
> > > > > >
> > > > > > So if we'd implement resending of pending events after group closure, I
> > > > > > don't see how default response (at least in its current form) would be
> > > > > > useful for anything.
> > > > > >
> > > > > > Why I like the proposal of resending pending events:
> > > > > > a) No spurious FAN_DENY errors in case of service crash
> > > > > > b) No need for new concept (and API) for default response, just a feature
> > > > > >    flag.
> > > > > > c) With additional ioctl to trigger resending pending events without group
> > > > > >    closure, the newly started service can simply reuse the
> > > > > >    same notification group (even in case of old service crash) thus
> > > > > >    inheriting all placed marks (which is something Ibrahim would like to
> > > > > >    have).
> > > > >
> > > >
> > > > I'm also a fan of the approach of support for resending pending events. As
> > > > mentioned exposing this behavior as an ioctl and thereby removing the need to
> > > > recreate fanotify group makes the usage a fair bit simpler for our case.
> > > >
> > > > One basic question I have (mainly for understanding), is if the FAN_RETRY flag is
> > > > set in the proposed patch, in the case where there is one existing group being
> > > > closed (ie no handover setup), what would be the behavior for pending events?
> > > > Is it the same as now, events are allowed, just that they get resent once?
> > >
> > > Yes, same as now.
> > > Instead of replying FAN_ALLOW, syscall is being restarted
> > > to check if a new watcher was added since this watcher took the event.
> >
> > Yes, just it isn't the whole syscall that's restarted but only the
> > fsnotify() call.

I was trying out the resend patch Jan posted in this thread along with a
simple ioctl to trigger the resend flow - it worked well, any remaining
concerns with exposing this functionality? If not I could go ahead and
pull in Jan's change and post it with additional ioctl.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-11 22:30                   ` Ibrahim Jirdeh
@ 2025-07-12  8:08                     ` Amir Goldstein
  2025-07-14  7:01                       ` Ibrahim Jirdeh
  2025-07-14 17:25                       ` Jan Kara
  0 siblings, 2 replies; 24+ messages in thread
From: Amir Goldstein @ 2025-07-12  8:08 UTC (permalink / raw)
  To: Ibrahim Jirdeh; +Cc: jack, josef, lesha, linux-fsdevel, sargun

On Sat, Jul 12, 2025 at 12:37 AM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
>
> > On Thu, Jul 3, 2025 at 4:43 PM Jan Kara <jack@suse.cz> wrote:
> > >
> > > On Thu 03-07-25 10:27:17, Amir Goldstein wrote:
> > > > On Thu, Jul 3, 2025 at 9:10 AM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
> > > > >
> > > > > > On Wed, Jul 2, 2025 at 6:15 PM Jan Kara <jack@suse.cz> wrote:
> > > > > > > Eventually the new service starts and we are in the situation I describe 3
> > > > > > > paragraphs above about handling pending events.
> > > > > > >
> > > > > > > So if we'd implement resending of pending events after group closure, I
> > > > > > > don't see how default response (at least in its current form) would be
> > > > > > > useful for anything.
> > > > > > >
> > > > > > > Why I like the proposal of resending pending events:
> > > > > > > a) No spurious FAN_DENY errors in case of service crash
> > > > > > > b) No need for new concept (and API) for default response, just a feature
> > > > > > >    flag.
> > > > > > > c) With additional ioctl to trigger resending pending events without group
> > > > > > >    closure, the newly started service can simply reuse the
> > > > > > >    same notification group (even in case of old service crash) thus
> > > > > > >    inheriting all placed marks (which is something Ibrahim would like to
> > > > > > >    have).
> > > > > >
> > > > >
> > > > > I'm also a fan of the approach of support for resending pending events. As
> > > > > mentioned exposing this behavior as an ioctl and thereby removing the need to
> > > > > recreate fanotify group makes the usage a fair bit simpler for our case.
> > > > >
> > > > > One basic question I have (mainly for understanding), is if the FAN_RETRY flag is
> > > > > set in the proposed patch, in the case where there is one existing group being
> > > > > closed (ie no handover setup), what would be the behavior for pending events?
> > > > > Is it the same as now, events are allowed, just that they get resent once?
> > > >
> > > > Yes, same as now.
> > > > Instead of replying FAN_ALLOW, syscall is being restarted
> > > > to check if a new watcher was added since this watcher took the event.
> > >
> > > Yes, just it isn't the whole syscall that's restarted but only the
> > > fsnotify() call.
>
> I was trying out the resend patch Jan posted in this thread along with a
> simple ioctl to trigger the resend flow - it worked well, any remaining
> concerns with exposing this functionality? If not I could go ahead and
> pull in Jan's change and post it with additional ioctl.

I do not have any concern about the retry behavior itself,
but about the ioctl, feature dependency and test matrix.

Regarding the ioctl, it occured to me that it may be a foot gun.
Once the new group interrupts all the in-flight events,
if due to a userland bug, this is done without full collaboration
with old group, there could be nasty races of both old and new
groups responding to the same event, and with recyclable
ida response ids that could cause a real mess.

Of course you can say it is userspace blame, but the smooth
handover from old service to new service instance is not always
easy to get right, hence, a foot gun.

If we implement the control-fd/queue-fd design, we would
not have this problem.
The ioctl to open an event-queue-fd would fail it a queue
handler fd is already open.
IOW, the handover is kernel controlled and much safer.
For the sake of discussion let's call this feature
FAN_CONTROL_FD and let it allow the ioctl
IOC_FAN_OPEN_QUEUE_FD.

The simpler functionality of FAN_RETRY_UNANSWERED
may be useful regardless of the handover ioctl (?), but if we
agree that the correct design for handover is the control fd design,
and this design will require a feature flag anyway,
then I don't think that we need two feature flags.

If users want the retry unanswered functionality, they can use the
new API for control fd, regardless of whether they intend to store
the fd and do handover or not.

The control fd API means that when a *queue* fd is released,
events remain in pending state until a new queue fd is opened
and can also imply the retry unanswered behavior,
when the *control* fd is released.

I don't think there is much to lose from this retry behavior.
The only reason we want to opt-in for it is to avoid surprises of
behavior change in existing deployments.

While we could have FAN_RETRY_UNANSWERED as an
independent feature without a handover ioctl,
In order to avoid test matrix bloat, at least for a start (we can relax later),
I don't think that we should allow it as an independent feature
and especially not for legacy modes (i.e. for Anti-Virus) unless there
is a concrete user requesting/testing these use cases.

Going on about feature dependency.

Practically, a handover ioctl is useless without
FAN_REPORT_RESPONSE_ID, so for sure we need to require
FAN_REPORT_RESPONSE_ID for the handover ioctl feature.

Because I do not see an immediate use case for
FAN_REPORT_RESPONSE_ID without handover,
I would start by only allowing them together and consider relaxing
later if such a use case is found.

I will even consider taking this further and start with
FAN_CLASS_PRE_CONTENT_FID requiring
both the new feature flags.

Currently my pre-dir-content patches allow reporting event->fd,
but that is only because they *can* not because they *should*.
Whether or not we want to allow pre-dir-content events with
event->fd is still an open question.

Until we have an answer to this question based on use case,
once again, I prefer the conservative way of merging the
maximal-restrictions/minimal-test-matrix and I would like to
require  FAN_REPORT_RESPONSE_ID and control fd for
FAN_CLASS_PRE_CONTENT_FID.

This rant became longer than I had intended.

Am I missing anything about meta use cases
or the risks in the resend pending events ioctl?

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-12  8:08                     ` Amir Goldstein
@ 2025-07-14  7:01                       ` Ibrahim Jirdeh
  2025-07-14 15:59                         ` Amir Goldstein
  2025-07-14 17:25                       ` Jan Kara
  1 sibling, 1 reply; 24+ messages in thread
From: Ibrahim Jirdeh @ 2025-07-14  7:01 UTC (permalink / raw)
  To: amir73il; +Cc: ibrahimjirdeh, jack, josef, lesha, linux-fsdevel, sargun

On 7/12/25, 1:08 AM, "Amir Goldstein" <amir73il@gmail.com <mailto:amir73il@gmail.com>> wrote:

> Regarding the ioctl, it occured to me that it may be a foot gun.
> Once the new group interrupts all the in-flight events,
> if due to a userland bug, this is done without full collaboration
> with old group, there could be nasty races of both old and new
> groups responding to the same event, and with recyclable
> ida response ids that could cause a real mess.

Makes sense. I had only considered an "ideal" usage where the resend
ioctl is synchronized. Sounds reasonable to provide stronger guarantees
within the surfaced api.

> If we implement the control-fd/queue-fd design, we would
> not have this problem.
> The ioctl to open an event-queue-fd would fail it a queue
> handler fd is already open.

I had a few questions around the control-fd/queue-fd api you outlined.
Most basically, in the new design, do we now only allow reading events /
writing responses through the issued queue-fd.

> The control fd API means that when a *queue* fd is released,
> events remain in pending state until a new queue fd is opened
> and can also imply the retry unanswered behavior,
> when the *control* fd is released.

It may match what you are saying, but is it safe to simply trigger the
retry unanswered flow for pending events (events that are read but not
answered) on queue fd release. And similarly the control fd release would
just match the current release flow of allowing / resending all queued
events + destroying group.

And in terms of api usage does something like the following look
reasonable for the handover:

- Control fd is still kept in fd store just like current setup
- Queue fd is not. This way on daemon restart/crash we will always resend
any pending events via the queue fd release
- On daemon startup always call ioctl to reissue a new queue fd

> Because I do not see an immediate use case for
> FAN_REPORT_RESPONSE_ID without handover,
> I would start by only allowing them together and consider relaxing
> later if such a use case is found.
>
> I will even consider taking this further and start with
> FAN_CLASS_PRE_CONTENT_FID requiring
> both the new feature flags.

The feature dependence sounds reasonable to me. We will need both
FAN_REPORT_RESPONSE_ID and retry behavior + something like proposed
control fd api to robustly handle pending events.

> Am I missing anything about meta use cases
> or the risks in the resend pending events ioctl?

I don't think theres any other complications related to pending events in
our use case. And based on my understanding of the api you proposed, it
should address our case well. I can just briefly mention why its desirable
to have some mechanism to trigger resend while still using the same
group, I might have added this in a previous discussion. Apart from
interested (mounts of) directories, we are also adding ignore marks for
all populated files. So we would need to recreate this state if just
relying on retry behavior triggering on group close. Its doable on the
use case side but probably a bit tricky versus being able to continue
to use the existing group which has proper state.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-14  7:01                       ` Ibrahim Jirdeh
@ 2025-07-14 15:59                         ` Amir Goldstein
  2025-07-14 16:57                           ` Ibrahim Jirdeh
  0 siblings, 1 reply; 24+ messages in thread
From: Amir Goldstein @ 2025-07-14 15:59 UTC (permalink / raw)
  To: Ibrahim Jirdeh; +Cc: jack, josef, lesha, linux-fsdevel, sargun

On Mon, Jul 14, 2025 at 9:02 AM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
>
> On 7/12/25, 1:08 AM, "Amir Goldstein" <amir73il@gmail.com <mailto:amir73il@gmail.com>> wrote:
>
> > Regarding the ioctl, it occured to me that it may be a foot gun.
> > Once the new group interrupts all the in-flight events,
> > if due to a userland bug, this is done without full collaboration
> > with old group, there could be nasty races of both old and new
> > groups responding to the same event, and with recyclable
> > ida response ids that could cause a real mess.
>
> Makes sense. I had only considered an "ideal" usage where the resend
> ioctl is synchronized. Sounds reasonable to provide stronger guarantees
> within the surfaced api.
>
> > If we implement the control-fd/queue-fd design, we would
> > not have this problem.
> > The ioctl to open an event-queue-fd would fail it a queue
> > handler fd is already open.
>
> I had a few questions around the control-fd/queue-fd api you outlined.
> Most basically, in the new design, do we now only allow reading events /
> writing responses through the issued queue-fd.
>

Correct.

The fanotify control fd is what keeps the group object alive
and it is used for fanotify_mark() and for the ioctl that generated
the queue-fd.

The queue-fd is for fanotify_read (events) and fanotify_write
(responses).

> > The control fd API means that when a *queue* fd is released,
> > events remain in pending state until a new queue fd is opened
> > and can also imply the retry unanswered behavior,
> > when the *control* fd is released.
>
> It may match what you are saying, but is it safe to simply trigger the
> retry unanswered flow for pending events (events that are read but not
> answered) on queue fd release.

Yes you are right. This makes sense.
I did not say this correctly. I wrote it more accurately.

> And similarly the control fd release would
> just match the current release flow of allowing / resending all queued
> events + destroying group.

Yes, that allows a handover without a fd store.
- Start new group, setup marks, open queue fd, don't read from it
- Stop old group
- New group starts reading events (including the resent ones)

>
> And in terms of api usage does something like the following look
> reasonable for the handover:
>
> - Control fd is still kept in fd store just like current setup
> - Queue fd is not. This way on daemon restart/crash we will always resend
> any pending events via the queue fd release
> - On daemon startup always call ioctl to reissue a new queue fd
>

Yes. Exactly. sounds simple and natural.
There may be complications, but I do not see them yet.

> > Because I do not see an immediate use case for
> > FAN_REPORT_RESPONSE_ID without handover,
> > I would start by only allowing them together and consider relaxing
> > later if such a use case is found.
> >
> > I will even consider taking this further and start with
> > FAN_CLASS_PRE_CONTENT_FID requiring
> > both the new feature flags.
>
> The feature dependence sounds reasonable to me. We will need both
> FAN_REPORT_RESPONSE_ID and retry behavior + something like proposed
> control fd api to robustly handle pending events.
>
> > Am I missing anything about meta use cases
> > or the risks in the resend pending events ioctl?
>
> I don't think theres any other complications related to pending events in
> our use case. And based on my understanding of the api you proposed, it
> should address our case well. I can just briefly mention why its desirable
> to have some mechanism to trigger resend while still using the same
> group, I might have added this in a previous discussion. Apart from
> interested (mounts of) directories, we are also adding ignore marks for
> all populated files. So we would need to recreate this state if just
> relying on retry behavior triggering on group close. Its doable on the
> use case side but probably a bit tricky versus being able to continue
> to use the existing group which has proper state.

I needed no explanation - this was clear to me but maybe
someone else did so good that you wrote it ;)

But I think that besides the convenience of keeping the marks,
it is not really doable to restart the service with guarantee that:
- You won't lose any event
- User will not be denied access between services

Especially, if the old service instance could be hung and killed by a watchdog,
IMO the restart mechanism is a must for a smooth and safe handover.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-14 15:59                         ` Amir Goldstein
@ 2025-07-14 16:57                           ` Ibrahim Jirdeh
  0 siblings, 0 replies; 24+ messages in thread
From: Ibrahim Jirdeh @ 2025-07-14 16:57 UTC (permalink / raw)
  To: amir73il; +Cc: ibrahimjirdeh, jack, josef, lesha, linux-fsdevel, sargun

This all sounds great. I can work on the control fd API and post it
soon for further discussion.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-12  8:08                     ` Amir Goldstein
  2025-07-14  7:01                       ` Ibrahim Jirdeh
@ 2025-07-14 17:25                       ` Jan Kara
  2025-07-14 19:59                         ` Amir Goldstein
  1 sibling, 1 reply; 24+ messages in thread
From: Jan Kara @ 2025-07-14 17:25 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Ibrahim Jirdeh, jack, josef, lesha, linux-fsdevel, sargun

On Sat 12-07-25 10:08:25, Amir Goldstein wrote:
> On Sat, Jul 12, 2025 at 12:37 AM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
> >
> > > On Thu, Jul 3, 2025 at 4:43 PM Jan Kara <jack@suse.cz> wrote:
> > > >
> > > > On Thu 03-07-25 10:27:17, Amir Goldstein wrote:
> > > > > On Thu, Jul 3, 2025 at 9:10 AM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
> > > > > >
> > > > > > > On Wed, Jul 2, 2025 at 6:15 PM Jan Kara <jack@suse.cz> wrote:
> > > > > > > > Eventually the new service starts and we are in the situation I describe 3
> > > > > > > > paragraphs above about handling pending events.
> > > > > > > >
> > > > > > > > So if we'd implement resending of pending events after group closure, I
> > > > > > > > don't see how default response (at least in its current form) would be
> > > > > > > > useful for anything.
> > > > > > > >
> > > > > > > > Why I like the proposal of resending pending events:
> > > > > > > > a) No spurious FAN_DENY errors in case of service crash
> > > > > > > > b) No need for new concept (and API) for default response, just a feature
> > > > > > > >    flag.
> > > > > > > > c) With additional ioctl to trigger resending pending events without group
> > > > > > > >    closure, the newly started service can simply reuse the
> > > > > > > >    same notification group (even in case of old service crash) thus
> > > > > > > >    inheriting all placed marks (which is something Ibrahim would like to
> > > > > > > >    have).
> > > > > > >
> > > > > >
> > > > > > I'm also a fan of the approach of support for resending pending events. As
> > > > > > mentioned exposing this behavior as an ioctl and thereby removing the need to
> > > > > > recreate fanotify group makes the usage a fair bit simpler for our case.
> > > > > >
> > > > > > One basic question I have (mainly for understanding), is if the FAN_RETRY flag is
> > > > > > set in the proposed patch, in the case where there is one existing group being
> > > > > > closed (ie no handover setup), what would be the behavior for pending events?
> > > > > > Is it the same as now, events are allowed, just that they get resent once?
> > > > >
> > > > > Yes, same as now.
> > > > > Instead of replying FAN_ALLOW, syscall is being restarted
> > > > > to check if a new watcher was added since this watcher took the event.
> > > >
> > > > Yes, just it isn't the whole syscall that's restarted but only the
> > > > fsnotify() call.
> >
> > I was trying out the resend patch Jan posted in this thread along with a
> > simple ioctl to trigger the resend flow - it worked well, any remaining
> > concerns with exposing this functionality? If not I could go ahead and
> > pull in Jan's change and post it with additional ioctl.
> 
> I do not have any concern about the retry behavior itself,
> but about the ioctl, feature dependency and test matrix.
> 
> Regarding the ioctl, it occured to me that it may be a foot gun.
> Once the new group interrupts all the in-flight events,
> if due to a userland bug, this is done without full collaboration
> with old group, there could be nasty races of both old and new
> groups responding to the same event, and with recyclable
> ida response ids that could cause a real mess.
> 
> Of course you can say it is userspace blame, but the smooth
> handover from old service to new service instance is not always
> easy to get right, hence, a foot gun.
> 
> If we implement the control-fd/queue-fd design, we would
> not have this problem.
> The ioctl to open an event-queue-fd would fail it a queue
> handler fd is already open.
> IOW, the handover is kernel controlled and much safer.
> For the sake of discussion let's call this feature
> FAN_CONTROL_FD and let it allow the ioctl
> IOC_FAN_OPEN_QUEUE_FD.

I agree this is probably a safer variant.

> The simpler functionality of FAN_RETRY_UNANSWERED
> may be useful regardless of the handover ioctl (?), but if we
> agree that the correct design for handover is the control fd design,
> and this design will require a feature flag anyway,
> then I don't think that we need two feature flags.
> 
> If users want the retry unanswered functionality, they can use the
> new API for control fd, regardless of whether they intend to store
> the fd and do handover or not.
> 
> The control fd API means that when a *queue* fd is released,
> events remain in pending state until a new queue fd is opened
> and can also imply the retry unanswered behavior,
> when the *control* fd is released.

Right, given with queue-fd design we actually have clear "successor" fd
where to report already reported but not answered events. So we can just
move back unanswered events on close of old queue fd so they'd be reported
again on read from the new queue fd. So there will be no need to bother
other notification groups with resent events with this design.
 
> I don't think there is much to lose from this retry behavior.
> The only reason we want to opt-in for it is to avoid surprises of
> behavior change in existing deployments.
> 
> While we could have FAN_RETRY_UNANSWERED as an
> independent feature without a handover ioctl,
> In order to avoid test matrix bloat, at least for a start (we can relax later),
> I don't think that we should allow it as an independent feature
> and especially not for legacy modes (i.e. for Anti-Virus) unless there
> is a concrete user requesting/testing these use cases.

With queue-fd design I agree there's no reason not to have the "resend
pending events" behavior from the start.

> Going on about feature dependency.
> 
> Practically, a handover ioctl is useless without
> FAN_REPORT_RESPONSE_ID, so for sure we need to require
> FAN_REPORT_RESPONSE_ID for the handover ioctl feature.
> 
> Because I do not see an immediate use case for
> FAN_REPORT_RESPONSE_ID without handover,
> I would start by only allowing them together and consider relaxing
> later if such a use case is found.

We can tie them together but I think queue-fd design doesn't require
FAN_REPORT_RESPONSE_ID. Since we resend events anyway, we'd generate new
fds for events as well and things would just work AFAICT.

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-14 17:25                       ` Jan Kara
@ 2025-07-14 19:59                         ` Amir Goldstein
  2025-07-15 12:11                           ` Jan Kara
  0 siblings, 1 reply; 24+ messages in thread
From: Amir Goldstein @ 2025-07-14 19:59 UTC (permalink / raw)
  To: Jan Kara; +Cc: Ibrahim Jirdeh, josef, lesha, linux-fsdevel, sargun

On Mon, Jul 14, 2025 at 7:25 PM Jan Kara <jack@suse.cz> wrote:
>
> On Sat 12-07-25 10:08:25, Amir Goldstein wrote:
> > On Sat, Jul 12, 2025 at 12:37 AM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
> > >
> > > > On Thu, Jul 3, 2025 at 4:43 PM Jan Kara <jack@suse.cz> wrote:
> > > > >
> > > > > On Thu 03-07-25 10:27:17, Amir Goldstein wrote:
> > > > > > On Thu, Jul 3, 2025 at 9:10 AM Ibrahim Jirdeh <ibrahimjirdeh@meta.com> wrote:
> > > > > > >
> > > > > > > > On Wed, Jul 2, 2025 at 6:15 PM Jan Kara <jack@suse.cz> wrote:
> > > > > > > > > Eventually the new service starts and we are in the situation I describe 3
> > > > > > > > > paragraphs above about handling pending events.
> > > > > > > > >
> > > > > > > > > So if we'd implement resending of pending events after group closure, I
> > > > > > > > > don't see how default response (at least in its current form) would be
> > > > > > > > > useful for anything.
> > > > > > > > >
> > > > > > > > > Why I like the proposal of resending pending events:
> > > > > > > > > a) No spurious FAN_DENY errors in case of service crash
> > > > > > > > > b) No need for new concept (and API) for default response, just a feature
> > > > > > > > >    flag.
> > > > > > > > > c) With additional ioctl to trigger resending pending events without group
> > > > > > > > >    closure, the newly started service can simply reuse the
> > > > > > > > >    same notification group (even in case of old service crash) thus
> > > > > > > > >    inheriting all placed marks (which is something Ibrahim would like to
> > > > > > > > >    have).
> > > > > > > >
> > > > > > >
> > > > > > > I'm also a fan of the approach of support for resending pending events. As
> > > > > > > mentioned exposing this behavior as an ioctl and thereby removing the need to
> > > > > > > recreate fanotify group makes the usage a fair bit simpler for our case.
> > > > > > >
> > > > > > > One basic question I have (mainly for understanding), is if the FAN_RETRY flag is
> > > > > > > set in the proposed patch, in the case where there is one existing group being
> > > > > > > closed (ie no handover setup), what would be the behavior for pending events?
> > > > > > > Is it the same as now, events are allowed, just that they get resent once?
> > > > > >
> > > > > > Yes, same as now.
> > > > > > Instead of replying FAN_ALLOW, syscall is being restarted
> > > > > > to check if a new watcher was added since this watcher took the event.
> > > > >
> > > > > Yes, just it isn't the whole syscall that's restarted but only the
> > > > > fsnotify() call.
> > >
> > > I was trying out the resend patch Jan posted in this thread along with a
> > > simple ioctl to trigger the resend flow - it worked well, any remaining
> > > concerns with exposing this functionality? If not I could go ahead and
> > > pull in Jan's change and post it with additional ioctl.
> >
> > I do not have any concern about the retry behavior itself,
> > but about the ioctl, feature dependency and test matrix.
> >
> > Regarding the ioctl, it occured to me that it may be a foot gun.
> > Once the new group interrupts all the in-flight events,
> > if due to a userland bug, this is done without full collaboration
> > with old group, there could be nasty races of both old and new
> > groups responding to the same event, and with recyclable
> > ida response ids that could cause a real mess.
> >
> > Of course you can say it is userspace blame, but the smooth
> > handover from old service to new service instance is not always
> > easy to get right, hence, a foot gun.
> >
> > If we implement the control-fd/queue-fd design, we would
> > not have this problem.
> > The ioctl to open an event-queue-fd would fail it a queue
> > handler fd is already open.
> > IOW, the handover is kernel controlled and much safer.
> > For the sake of discussion let's call this feature
> > FAN_CONTROL_FD and let it allow the ioctl
> > IOC_FAN_OPEN_QUEUE_FD.
>
> I agree this is probably a safer variant.
>
> > The simpler functionality of FAN_RETRY_UNANSWERED
> > may be useful regardless of the handover ioctl (?), but if we
> > agree that the correct design for handover is the control fd design,
> > and this design will require a feature flag anyway,
> > then I don't think that we need two feature flags.
> >
> > If users want the retry unanswered functionality, they can use the
> > new API for control fd, regardless of whether they intend to store
> > the fd and do handover or not.
> >
> > The control fd API means that when a *queue* fd is released,
> > events remain in pending state until a new queue fd is opened
> > and can also imply the retry unanswered behavior,
> > when the *control* fd is released.
>
> Right, given with queue-fd design we actually have clear "successor" fd
> where to report already reported but not answered events. So we can just
> move back unanswered events on close of old queue fd so they'd be reported
> again on read from the new queue fd. So there will be no need to bother
> other notification groups with resent events with this design.
>

Yes,  we'd need to be careful with this new event state transition
to make sure that we "recycle" the event properly.

> > I don't think there is much to lose from this retry behavior.
> > The only reason we want to opt-in for it is to avoid surprises of
> > behavior change in existing deployments.
> >
> > While we could have FAN_RETRY_UNANSWERED as an
> > independent feature without a handover ioctl,
> > In order to avoid test matrix bloat, at least for a start (we can relax later),
> > I don't think that we should allow it as an independent feature
> > and especially not for legacy modes (i.e. for Anti-Virus) unless there
> > is a concrete user requesting/testing these use cases.
>
> With queue-fd design I agree there's no reason not to have the "resend
> pending events" behavior from the start.
>
> > Going on about feature dependency.
> >
> > Practically, a handover ioctl is useless without
> > FAN_REPORT_RESPONSE_ID, so for sure we need to require
> > FAN_REPORT_RESPONSE_ID for the handover ioctl feature.
> >
> > Because I do not see an immediate use case for
> > FAN_REPORT_RESPONSE_ID without handover,
> > I would start by only allowing them together and consider relaxing
> > later if such a use case is found.
>
> We can tie them together but I think queue-fd design doesn't require
> FAN_REPORT_RESPONSE_ID. Since we resend events anyway, we'd generate new
> fds for events as well and things would just work AFAICT.
>

Right. hmm.
I'd still like to consider the opportunity of the new-ish API for
deprecating some old legacy API baggage.

For example: do you consider the fact that async events are mixed
together with permission/pre-content events in the same queue
an historic mistake or a feature?

I'm tempted, as we split the control fd from the queue fd to limit
a queue to events of the same "type".
Maybe an O_RDONLY async queue fd
or an O_RDWR permission events queue fd
or only allow the latter with the new API?

I am not sure what the best semantics would be and I'd hate to
send Ibrahim on another walk into the woods with this added
requirement.

WDYT? Is it worth adding some limitations that may be relaxed later?
which? I'd be ok with limiting to permission events only for a start.

Ibrahim,

Please note that FAN_CLOEXEC and FAN_NONBLOCK
currently apply to the control fd.
I guess they can also apply to the queue fd,
Anyway the control fd should not be O_RDWR
probably O_RDONLY even though it's not for reading.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-14 19:59                         ` Amir Goldstein
@ 2025-07-15 12:11                           ` Jan Kara
  2025-07-15 14:50                             ` Amir Goldstein
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Kara @ 2025-07-15 12:11 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Jan Kara, Ibrahim Jirdeh, josef, lesha, linux-fsdevel, sargun

On Mon 14-07-25 21:59:22, Amir Goldstein wrote:
> On Mon, Jul 14, 2025 at 7:25 PM Jan Kara <jack@suse.cz> wrote:
> > > I don't think there is much to lose from this retry behavior.
> > > The only reason we want to opt-in for it is to avoid surprises of
> > > behavior change in existing deployments.
> > >
> > > While we could have FAN_RETRY_UNANSWERED as an
> > > independent feature without a handover ioctl,
> > > In order to avoid test matrix bloat, at least for a start (we can relax later),
> > > I don't think that we should allow it as an independent feature
> > > and especially not for legacy modes (i.e. for Anti-Virus) unless there
> > > is a concrete user requesting/testing these use cases.
> >
> > With queue-fd design I agree there's no reason not to have the "resend
> > pending events" behavior from the start.
> >
> > > Going on about feature dependency.
> > >
> > > Practically, a handover ioctl is useless without
> > > FAN_REPORT_RESPONSE_ID, so for sure we need to require
> > > FAN_REPORT_RESPONSE_ID for the handover ioctl feature.
> > >
> > > Because I do not see an immediate use case for
> > > FAN_REPORT_RESPONSE_ID without handover,
> > > I would start by only allowing them together and consider relaxing
> > > later if such a use case is found.
> >
> > We can tie them together but I think queue-fd design doesn't require
> > FAN_REPORT_RESPONSE_ID. Since we resend events anyway, we'd generate new
> > fds for events as well and things would just work AFAICT.
> >
> 
> Right. hmm.
> I'd still like to consider the opportunity of the new-ish API for
> deprecating some old legacy API baggage.
> 
> For example: do you consider the fact that async events are mixed
> together with permission/pre-content events in the same queue
> an historic mistake or a feature?
 
So far I do consider it a feature although not a particularly useful one.
Given fanotify doesn't guarantee ordering of events and async events can
arbitrarily skip over the permission events there's no substantial
difference to having two notification groups. If you can make a good case
of what would be simpler if we disallowed that I'm open to consider
disallowing that.

> I'm tempted, as we split the control fd from the queue fd to limit
> a queue to events of the same "type".
> Maybe an O_RDONLY async queue fd
> or an O_RDWR permission events queue fd
> or only allow the latter with the new API?

There's value in restricting unneeded functionality and there's also value
in having features not influencing one another so the balance has to be
found :). So far I don't find that disallowing async events with queue fd
or restricting fd mode for queue fd would simplify our life in a
significant way but maybe I'm missing something.

> Please note that FAN_CLOEXEC and FAN_NONBLOCK
> currently apply to the control fd.
> I guess they can also apply to the queue fd,
> Anyway the control fd should not be O_RDWR
> probably O_RDONLY even though it's not for reading.

I guess the ioctl to create queue fd can take desired flags for the queue
fd so we don't have to second guess what the application wants.

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-15 12:11                           ` Jan Kara
@ 2025-07-15 14:50                             ` Amir Goldstein
  2025-07-16  8:55                               ` Jan Kara
  0 siblings, 1 reply; 24+ messages in thread
From: Amir Goldstein @ 2025-07-15 14:50 UTC (permalink / raw)
  To: Jan Kara; +Cc: Ibrahim Jirdeh, josef, lesha, linux-fsdevel, sargun

On Tue, Jul 15, 2025 at 2:11 PM Jan Kara <jack@suse.cz> wrote:
>
> On Mon 14-07-25 21:59:22, Amir Goldstein wrote:
> > On Mon, Jul 14, 2025 at 7:25 PM Jan Kara <jack@suse.cz> wrote:
> > > > I don't think there is much to lose from this retry behavior.
> > > > The only reason we want to opt-in for it is to avoid surprises of
> > > > behavior change in existing deployments.
> > > >
> > > > While we could have FAN_RETRY_UNANSWERED as an
> > > > independent feature without a handover ioctl,
> > > > In order to avoid test matrix bloat, at least for a start (we can relax later),
> > > > I don't think that we should allow it as an independent feature
> > > > and especially not for legacy modes (i.e. for Anti-Virus) unless there
> > > > is a concrete user requesting/testing these use cases.
> > >
> > > With queue-fd design I agree there's no reason not to have the "resend
> > > pending events" behavior from the start.
> > >
> > > > Going on about feature dependency.
> > > >
> > > > Practically, a handover ioctl is useless without
> > > > FAN_REPORT_RESPONSE_ID, so for sure we need to require
> > > > FAN_REPORT_RESPONSE_ID for the handover ioctl feature.
> > > >
> > > > Because I do not see an immediate use case for
> > > > FAN_REPORT_RESPONSE_ID without handover,
> > > > I would start by only allowing them together and consider relaxing
> > > > later if such a use case is found.
> > >
> > > We can tie them together but I think queue-fd design doesn't require
> > > FAN_REPORT_RESPONSE_ID. Since we resend events anyway, we'd generate new
> > > fds for events as well and things would just work AFAICT.
> > >
> >
> > Right. hmm.
> > I'd still like to consider the opportunity of the new-ish API for
> > deprecating some old legacy API baggage.
> >
> > For example: do you consider the fact that async events are mixed
> > together with permission/pre-content events in the same queue
> > an historic mistake or a feature?
>
> So far I do consider it a feature although not a particularly useful one.
> Given fanotify doesn't guarantee ordering of events and async events can
> arbitrarily skip over the permission events there's no substantial
> difference to having two notification groups.

I am not sure I understand your claim.
It sounds like you are arguing for my case, because mixing
mergeable async events and non-mergeable permission events
in the same queue can be very confusing.
Therefore, I consider it an anti-feature.
Users can get the same functionality from having two groups,
with much more sane semantics.

> If you can make a good case
> of what would be simpler if we disallowed that I'm open to consider
> disallowing that.
>

Clearly, handling permission events should have had higher priority,
but we do not have a priority queue, nor do we provide any means
for userspace to handle permission events before async events.

Nudging users to separate the events of different urgency to
different queues/groups can allow userspace to read from the
higher priority fd first as they should.

> > I'm tempted, as we split the control fd from the queue fd to limit
> > a queue to events of the same "type".
> > Maybe an O_RDONLY async queue fd
> > or an O_RDWR permission events queue fd
> > or only allow the latter with the new API?
>
> There's value in restricting unneeded functionality and there's also value
> in having features not influencing one another so the balance has to be
> found :).

<nod>

> So far I don't find that disallowing async events with queue fd
> or restricting fd mode for queue fd would simplify our life in a
> significant way but maybe I'm missing something.
>

It only simplifies our life if we are going to be honest about test coverage.
When we return a pending permission events to the head of the queue
when queue fd is closed, does it matter if the queue has async events
or other permission events or a mix of them?

Logically, it shouldn't matter, but assuming that for tests is not the
best practice.
Thus, reducing the test matrix for a new feature by removing a
configuration that
I consider misguided feels like a good idea, but I am also feeling
that my opinion
on this matter is very biased, so I'd be happy if you can provide a more
objective decision about the restrictions.

> > Please note that FAN_CLOEXEC and FAN_NONBLOCK
> > currently apply to the control fd.
> > I guess they can also apply to the queue fd,
> > Anyway the control fd should not be O_RDWR
> > probably O_RDONLY even though it's not for reading.
>
> I guess the ioctl to create queue fd can take desired flags for the queue
> fd so we don't have to second guess what the application wants.
>

Correct, but note that FAN_NONBLOCK is meaningless
to the io-less control fd.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-15 14:50                             ` Amir Goldstein
@ 2025-07-16  8:55                               ` Jan Kara
  2025-07-16 10:31                                 ` Amir Goldstein
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Kara @ 2025-07-16  8:55 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Jan Kara, Ibrahim Jirdeh, josef, lesha, linux-fsdevel, sargun

On Tue 15-07-25 16:50:00, Amir Goldstein wrote:
> On Tue, Jul 15, 2025 at 2:11 PM Jan Kara <jack@suse.cz> wrote:
> > On Mon 14-07-25 21:59:22, Amir Goldstein wrote:
> > > On Mon, Jul 14, 2025 at 7:25 PM Jan Kara <jack@suse.cz> wrote:
> > > > > I don't think there is much to lose from this retry behavior.
> > > > > The only reason we want to opt-in for it is to avoid surprises of
> > > > > behavior change in existing deployments.
> > > > >
> > > > > While we could have FAN_RETRY_UNANSWERED as an
> > > > > independent feature without a handover ioctl,
> > > > > In order to avoid test matrix bloat, at least for a start (we can relax later),
> > > > > I don't think that we should allow it as an independent feature
> > > > > and especially not for legacy modes (i.e. for Anti-Virus) unless there
> > > > > is a concrete user requesting/testing these use cases.
> > > >
> > > > With queue-fd design I agree there's no reason not to have the "resend
> > > > pending events" behavior from the start.
> > > >
> > > > > Going on about feature dependency.
> > > > >
> > > > > Practically, a handover ioctl is useless without
> > > > > FAN_REPORT_RESPONSE_ID, so for sure we need to require
> > > > > FAN_REPORT_RESPONSE_ID for the handover ioctl feature.
> > > > >
> > > > > Because I do not see an immediate use case for
> > > > > FAN_REPORT_RESPONSE_ID without handover,
> > > > > I would start by only allowing them together and consider relaxing
> > > > > later if such a use case is found.
> > > >
> > > > We can tie them together but I think queue-fd design doesn't require
> > > > FAN_REPORT_RESPONSE_ID. Since we resend events anyway, we'd generate new
> > > > fds for events as well and things would just work AFAICT.
> > > >
> > >
> > > Right. hmm.
> > > I'd still like to consider the opportunity of the new-ish API for
> > > deprecating some old legacy API baggage.
> > >
> > > For example: do you consider the fact that async events are mixed
> > > together with permission/pre-content events in the same queue
> > > an historic mistake or a feature?
> >
> > So far I do consider it a feature although not a particularly useful one.
> > Given fanotify doesn't guarantee ordering of events and async events can
> > arbitrarily skip over the permission events there's no substantial
> > difference to having two notification groups.
> 
> I am not sure I understand your claim.
> It sounds like you are arguing for my case, because mixing
> mergeable async events and non-mergeable permission events
> in the same queue can be very confusing.
> Therefore, I consider it an anti-feature.
> Users can get the same functionality from having two groups,
> with much more sane semantics.

I'd say with the same semantics but less expectations about possibly nicer
semantics :). But we agree two groups are a cleaner way to achieve the
same and give userspace more flexibility with handling the events at the
same time.
 
> > > I'm tempted, as we split the control fd from the queue fd to limit
> > > a queue to events of the same "type".
> > > Maybe an O_RDONLY async queue fd
> > > or an O_RDWR permission events queue fd
> > > or only allow the latter with the new API?
> >
> > There's value in restricting unneeded functionality and there's also value
> > in having features not influencing one another so the balance has to be
> > found :).
> 
> <nod>
> 
> > So far I don't find that disallowing async events with queue fd
> > or restricting fd mode for queue fd would simplify our life in a
> > significant way but maybe I'm missing something.
> >
> 
> It only simplifies our life if we are going to be honest about test coverage.
> When we return a pending permission events to the head of the queue
> when queue fd is closed, does it matter if the queue has async events
> or other permission events or a mix of them?
> 
> Logically, it shouldn't matter, but assuming that for tests is not the
> best practice.

Agreed.

> Thus, reducing the test matrix for a new feature by removing a
> configuration that I consider misguided feels like a good idea, but I am
> also feeling that my opinion on this matter is very biased, so I'd be
> happy if you can provide a more objective decision about the
> restrictions.

Heh, nobody is objective :). We are all biased by our past experiences.
Which is why discussing things together is so beneficial. I agree about
the benefit of simplier testing and that hardly anybody is going to miss
the functionality, what still isn't 100% clear to me how the restrictions
would be implemented in the API. So user creates fanotify_group() with
control fd feature flag. Now will he have to specify in advance whether the
group is for permission events or not? Already when creating the group
(i.e., another feature flag? Or infered from group priority?)? Or when
creating queue fd? Or will the group switch based on marks being placed to
it? I think explicit selection is less confusing than implicit by placed
marks. Using group priority looks appealing at first sight but currently
group priorities also have implications about order in which we deliver
events (both async and permission) to groups and it would be also somewhat
confusing that FAN_CLASS_PRE_CONTENT groups may still likely use
FAN_OPEN_PERM event. So maybe it's not that great fit. Hum... Or we can
have a single feature flag (good name needed!) that will create group with
control fd *and* restricted to permission events only. That actually seems
like the least confusing option to me now.
 
								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-16  8:55                               ` Jan Kara
@ 2025-07-16 10:31                                 ` Amir Goldstein
  2025-07-21 12:56                                   ` Jan Kara
  0 siblings, 1 reply; 24+ messages in thread
From: Amir Goldstein @ 2025-07-16 10:31 UTC (permalink / raw)
  To: Jan Kara; +Cc: Ibrahim Jirdeh, josef, lesha, linux-fsdevel, sargun

On Wed, Jul 16, 2025 at 10:55 AM Jan Kara <jack@suse.cz> wrote:
>
> On Tue 15-07-25 16:50:00, Amir Goldstein wrote:
> > On Tue, Jul 15, 2025 at 2:11 PM Jan Kara <jack@suse.cz> wrote:
> > > On Mon 14-07-25 21:59:22, Amir Goldstein wrote:
> > > > On Mon, Jul 14, 2025 at 7:25 PM Jan Kara <jack@suse.cz> wrote:
> > > > > > I don't think there is much to lose from this retry behavior.
> > > > > > The only reason we want to opt-in for it is to avoid surprises of
> > > > > > behavior change in existing deployments.
> > > > > >
> > > > > > While we could have FAN_RETRY_UNANSWERED as an
> > > > > > independent feature without a handover ioctl,
> > > > > > In order to avoid test matrix bloat, at least for a start (we can relax later),
> > > > > > I don't think that we should allow it as an independent feature
> > > > > > and especially not for legacy modes (i.e. for Anti-Virus) unless there
> > > > > > is a concrete user requesting/testing these use cases.
> > > > >
> > > > > With queue-fd design I agree there's no reason not to have the "resend
> > > > > pending events" behavior from the start.
> > > > >
> > > > > > Going on about feature dependency.
> > > > > >
> > > > > > Practically, a handover ioctl is useless without
> > > > > > FAN_REPORT_RESPONSE_ID, so for sure we need to require
> > > > > > FAN_REPORT_RESPONSE_ID for the handover ioctl feature.
> > > > > >
> > > > > > Because I do not see an immediate use case for
> > > > > > FAN_REPORT_RESPONSE_ID without handover,
> > > > > > I would start by only allowing them together and consider relaxing
> > > > > > later if such a use case is found.
> > > > >
> > > > > We can tie them together but I think queue-fd design doesn't require
> > > > > FAN_REPORT_RESPONSE_ID. Since we resend events anyway, we'd generate new
> > > > > fds for events as well and things would just work AFAICT.
> > > > >
> > > >
> > > > Right. hmm.
> > > > I'd still like to consider the opportunity of the new-ish API for
> > > > deprecating some old legacy API baggage.
> > > >
> > > > For example: do you consider the fact that async events are mixed
> > > > together with permission/pre-content events in the same queue
> > > > an historic mistake or a feature?
> > >
> > > So far I do consider it a feature although not a particularly useful one.
> > > Given fanotify doesn't guarantee ordering of events and async events can
> > > arbitrarily skip over the permission events there's no substantial
> > > difference to having two notification groups.
> >
> > I am not sure I understand your claim.
> > It sounds like you are arguing for my case, because mixing
> > mergeable async events and non-mergeable permission events
> > in the same queue can be very confusing.
> > Therefore, I consider it an anti-feature.
> > Users can get the same functionality from having two groups,
> > with much more sane semantics.
>
> I'd say with the same semantics but less expectations about possibly nicer
> semantics :). But we agree two groups are a cleaner way to achieve the
> same and give userspace more flexibility with handling the events at the
> same time.
>
> > > > I'm tempted, as we split the control fd from the queue fd to limit
> > > > a queue to events of the same "type".
> > > > Maybe an O_RDONLY async queue fd
> > > > or an O_RDWR permission events queue fd
> > > > or only allow the latter with the new API?
> > >
> > > There's value in restricting unneeded functionality and there's also value
> > > in having features not influencing one another so the balance has to be
> > > found :).
> >
> > <nod>
> >
> > > So far I don't find that disallowing async events with queue fd
> > > or restricting fd mode for queue fd would simplify our life in a
> > > significant way but maybe I'm missing something.
> > >
> >
> > It only simplifies our life if we are going to be honest about test coverage.
> > When we return a pending permission events to the head of the queue
> > when queue fd is closed, does it matter if the queue has async events
> > or other permission events or a mix of them?
> >
> > Logically, it shouldn't matter, but assuming that for tests is not the
> > best practice.
>
> Agreed.
>
> > Thus, reducing the test matrix for a new feature by removing a
> > configuration that I consider misguided feels like a good idea, but I am
> > also feeling that my opinion on this matter is very biased, so I'd be
> > happy if you can provide a more objective decision about the
> > restrictions.
>
> Heh, nobody is objective :). We are all biased by our past experiences.
> Which is why discussing things together is so beneficial. I agree about
> the benefit of simplier testing and that hardly anybody is going to miss
> the functionality, what still isn't 100% clear to me how the restrictions
> would be implemented in the API. So user creates fanotify_group() with
> control fd feature flag. Now will he have to specify in advance whether the
> group is for permission events or not? Already when creating the group
> (i.e., another feature flag? Or infered from group priority?)? Or when
> creating queue fd? Or will the group switch based on marks being placed to
> it? I think explicit selection is less confusing than implicit by placed
> marks. Using group priority looks appealing at first sight but currently
> group priorities also have implications about order in which we deliver
> events (both async and permission) to groups and it would be also somewhat
> confusing that FAN_CLASS_PRE_CONTENT groups may still likely use
> FAN_OPEN_PERM event. So maybe it's not that great fit. Hum... Or we can
> have a single feature flag (good name needed!) that will create group with
> control fd *and* restricted to permission events only. That actually seems
> like the least confusing option to me now.

I was thinking inferred from group priority along with control fd feature flag,
but I agree that being more explicit is better.

How about tying the name to your FAN_RETRY patch
and including its functionality along with the control fd:

FAN_RESTARTABLE_EVENTS
       Events for fanotify groups initialized with this flag will be restarted
       if the group file descriptor is closed after reading the
permission events
       and before responding to the event.
       This means that if an event handler program gets stuck, a new event
       handler can be started before killing the old event handler,
without missing
       the permission event.
       The file descriptor returned from fanotify_init() cannot be
used to read and
       respond to permission events, only to configure marks.
       The IOC_FAN_OPEN_QUEUE_FD ioctl is required to acquire a secondary
       event queue file descriptor.  This event queue file descriptor is used to
       read the permission events and respond to them.
       When the queue file descriptor is closed, events that were read from the
       queue but not responded to, are returned to the queue and can be handled
       by another instance of the program that opens a new queue file
descriptor.
       The use of either FAN_CLASS_CONTENT or FAN_CLASS_PRE_CONTENT
       is required along with this flag.
       Only permission events and pre-content events are allowed to be
set in mark
       masks of a group that was initialized with this flag.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] fanotify: support custom default close response
  2025-07-16 10:31                                 ` Amir Goldstein
@ 2025-07-21 12:56                                   ` Jan Kara
  0 siblings, 0 replies; 24+ messages in thread
From: Jan Kara @ 2025-07-21 12:56 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Jan Kara, Ibrahim Jirdeh, josef, lesha, linux-fsdevel, sargun

On Wed 16-07-25 12:31:41, Amir Goldstein wrote:
> On Wed, Jul 16, 2025 at 10:55 AM Jan Kara <jack@suse.cz> wrote:
> >
> > On Tue 15-07-25 16:50:00, Amir Goldstein wrote:
> > > On Tue, Jul 15, 2025 at 2:11 PM Jan Kara <jack@suse.cz> wrote:
> > > > On Mon 14-07-25 21:59:22, Amir Goldstein wrote:
> > > > > On Mon, Jul 14, 2025 at 7:25 PM Jan Kara <jack@suse.cz> wrote:
> > > > > > > I don't think there is much to lose from this retry behavior.
> > > > > > > The only reason we want to opt-in for it is to avoid surprises of
> > > > > > > behavior change in existing deployments.
> > > > > > >
> > > > > > > While we could have FAN_RETRY_UNANSWERED as an
> > > > > > > independent feature without a handover ioctl,
> > > > > > > In order to avoid test matrix bloat, at least for a start (we can relax later),
> > > > > > > I don't think that we should allow it as an independent feature
> > > > > > > and especially not for legacy modes (i.e. for Anti-Virus) unless there
> > > > > > > is a concrete user requesting/testing these use cases.
> > > > > >
> > > > > > With queue-fd design I agree there's no reason not to have the "resend
> > > > > > pending events" behavior from the start.
> > > > > >
> > > > > > > Going on about feature dependency.
> > > > > > >
> > > > > > > Practically, a handover ioctl is useless without
> > > > > > > FAN_REPORT_RESPONSE_ID, so for sure we need to require
> > > > > > > FAN_REPORT_RESPONSE_ID for the handover ioctl feature.
> > > > > > >
> > > > > > > Because I do not see an immediate use case for
> > > > > > > FAN_REPORT_RESPONSE_ID without handover,
> > > > > > > I would start by only allowing them together and consider relaxing
> > > > > > > later if such a use case is found.
> > > > > >
> > > > > > We can tie them together but I think queue-fd design doesn't require
> > > > > > FAN_REPORT_RESPONSE_ID. Since we resend events anyway, we'd generate new
> > > > > > fds for events as well and things would just work AFAICT.
> > > > > >
> > > > >
> > > > > Right. hmm.
> > > > > I'd still like to consider the opportunity of the new-ish API for
> > > > > deprecating some old legacy API baggage.
> > > > >
> > > > > For example: do you consider the fact that async events are mixed
> > > > > together with permission/pre-content events in the same queue
> > > > > an historic mistake or a feature?
> > > >
> > > > So far I do consider it a feature although not a particularly useful one.
> > > > Given fanotify doesn't guarantee ordering of events and async events can
> > > > arbitrarily skip over the permission events there's no substantial
> > > > difference to having two notification groups.
> > >
> > > I am not sure I understand your claim.
> > > It sounds like you are arguing for my case, because mixing
> > > mergeable async events and non-mergeable permission events
> > > in the same queue can be very confusing.
> > > Therefore, I consider it an anti-feature.
> > > Users can get the same functionality from having two groups,
> > > with much more sane semantics.
> >
> > I'd say with the same semantics but less expectations about possibly nicer
> > semantics :). But we agree two groups are a cleaner way to achieve the
> > same and give userspace more flexibility with handling the events at the
> > same time.
> >
> > > > > I'm tempted, as we split the control fd from the queue fd to limit
> > > > > a queue to events of the same "type".
> > > > > Maybe an O_RDONLY async queue fd
> > > > > or an O_RDWR permission events queue fd
> > > > > or only allow the latter with the new API?
> > > >
> > > > There's value in restricting unneeded functionality and there's also value
> > > > in having features not influencing one another so the balance has to be
> > > > found :).
> > >
> > > <nod>
> > >
> > > > So far I don't find that disallowing async events with queue fd
> > > > or restricting fd mode for queue fd would simplify our life in a
> > > > significant way but maybe I'm missing something.
> > > >
> > >
> > > It only simplifies our life if we are going to be honest about test coverage.
> > > When we return a pending permission events to the head of the queue
> > > when queue fd is closed, does it matter if the queue has async events
> > > or other permission events or a mix of them?
> > >
> > > Logically, it shouldn't matter, but assuming that for tests is not the
> > > best practice.
> >
> > Agreed.
> >
> > > Thus, reducing the test matrix for a new feature by removing a
> > > configuration that I consider misguided feels like a good idea, but I am
> > > also feeling that my opinion on this matter is very biased, so I'd be
> > > happy if you can provide a more objective decision about the
> > > restrictions.
> >
> > Heh, nobody is objective :). We are all biased by our past experiences.
> > Which is why discussing things together is so beneficial. I agree about
> > the benefit of simplier testing and that hardly anybody is going to miss
> > the functionality, what still isn't 100% clear to me how the restrictions
> > would be implemented in the API. So user creates fanotify_group() with
> > control fd feature flag. Now will he have to specify in advance whether the
> > group is for permission events or not? Already when creating the group
> > (i.e., another feature flag? Or infered from group priority?)? Or when
> > creating queue fd? Or will the group switch based on marks being placed to
> > it? I think explicit selection is less confusing than implicit by placed
> > marks. Using group priority looks appealing at first sight but currently
> > group priorities also have implications about order in which we deliver
> > events (both async and permission) to groups and it would be also somewhat
> > confusing that FAN_CLASS_PRE_CONTENT groups may still likely use
> > FAN_OPEN_PERM event. So maybe it's not that great fit. Hum... Or we can
> > have a single feature flag (good name needed!) that will create group with
> > control fd *and* restricted to permission events only. That actually seems
> > like the least confusing option to me now.
> 
> I was thinking inferred from group priority along with control fd feature flag,
> but I agree that being more explicit is better.
> 
> How about tying the name to your FAN_RETRY patch
> and including its functionality along with the control fd:
> 
> FAN_RESTARTABLE_EVENTS
>        Events for fanotify groups initialized with this flag will be restarted
>        if the group file descriptor is closed after reading the
> permission events
>        and before responding to the event.
>        This means that if an event handler program gets stuck, a new event
>        handler can be started before killing the old event handler,
> without missing
>        the permission event.
>        The file descriptor returned from fanotify_init() cannot be
> used to read and
>        respond to permission events, only to configure marks.
>        The IOC_FAN_OPEN_QUEUE_FD ioctl is required to acquire a secondary
>        event queue file descriptor.  This event queue file descriptor is used to
>        read the permission events and respond to them.
>        When the queue file descriptor is closed, events that were read from the
>        queue but not responded to, are returned to the queue and can be handled
>        by another instance of the program that opens a new queue file
> descriptor.
>        The use of either FAN_CLASS_CONTENT or FAN_CLASS_PRE_CONTENT
>        is required along with this flag.
>        Only permission events and pre-content events are allowed to be
> set in mark
>        masks of a group that was initialized with this flag.

Sounds very nice to me.

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2025-07-21 12:56 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-23 19:25 [PATCH] fanotify: support custom default close response Ibrahim Jirdeh
2025-06-24  6:30 ` Amir Goldstein
2025-06-26 21:19   ` Ibrahim Jirdeh
2025-06-30 15:36   ` Jan Kara
2025-06-30 15:56     ` Amir Goldstein
2025-07-02 16:15       ` Jan Kara
2025-07-02 16:56         ` Amir Goldstein
2025-07-03  7:08           ` Ibrahim Jirdeh
2025-07-03  8:27             ` Amir Goldstein
2025-07-03 14:43               ` Jan Kara
2025-07-03 16:09                 ` Amir Goldstein
2025-07-11 22:30                   ` Ibrahim Jirdeh
2025-07-12  8:08                     ` Amir Goldstein
2025-07-14  7:01                       ` Ibrahim Jirdeh
2025-07-14 15:59                         ` Amir Goldstein
2025-07-14 16:57                           ` Ibrahim Jirdeh
2025-07-14 17:25                       ` Jan Kara
2025-07-14 19:59                         ` Amir Goldstein
2025-07-15 12:11                           ` Jan Kara
2025-07-15 14:50                             ` Amir Goldstein
2025-07-16  8:55                               ` Jan Kara
2025-07-16 10:31                                 ` Amir Goldstein
2025-07-21 12:56                                   ` Jan Kara
2025-06-24 11:11 ` kernel test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).