* [PATCH 1/3] kernfs: allow passing fsnotify event types
2026-02-10 0:37 [PATCH 0/3] kernfs: Add inotify IN_DELETE_SELF, IN_IGNORED support for files T.J. Mercier
@ 2026-02-10 0:37 ` T.J. Mercier
2026-02-10 0:38 ` [PATCH 2/3] kernfs: send IN_DELETE_SELF and IN_IGNORED on file deletion T.J. Mercier
2026-02-10 0:38 ` [PATCH 3/3] selftests: memcg: Add tests IN_DELETE_SELF and IN_IGNORED on memory.events T.J. Mercier
2 siblings, 0 replies; 6+ messages in thread
From: T.J. Mercier @ 2026-02-10 0:37 UTC (permalink / raw)
To: gregkh, tj, driver-core, linux-kernel, cgroups, shuah,
linux-kselftest
Cc: T.J. Mercier
The kernfs_notify function is hardcoded to only issue FS_MODIFY events
since that is the only current use case. Allow for supporting other
events by adding a notify_event field to kernfs_elem_attr. The
limitation of only one queued event per kernfs_node continues to exist
as a consequence of the design of the kernfs_notify_list. The new
notify_event field is protected by the same kernfs_notify_lock as the
existing notify_next field.
Signed-off-by: T.J. Mercier <tjmercier@google.com>
---
fs/kernfs/file.c | 8 ++++++--
include/linux/kernfs.h | 1 +
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 9adf36e6364b..e978284ff983 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -914,6 +914,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
struct kernfs_node *kn;
struct kernfs_super_info *info;
struct kernfs_root *root;
+ u32 notify_event;
repeat:
/* pop one off the notify_list */
spin_lock_irq(&kernfs_notify_lock);
@@ -924,6 +925,8 @@ static void kernfs_notify_workfn(struct work_struct *work)
}
kernfs_notify_list = kn->attr.notify_next;
kn->attr.notify_next = NULL;
+ notify_event = kn->attr.notify_event;
+ kn->attr.notify_event = 0;
spin_unlock_irq(&kernfs_notify_lock);
root = kernfs_root(kn);
@@ -954,7 +957,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
if (parent) {
p_inode = ilookup(info->sb, kernfs_ino(parent));
if (p_inode) {
- fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD,
+ fsnotify(notify_event | FS_EVENT_ON_CHILD,
inode, FSNOTIFY_EVENT_INODE,
p_inode, &name, inode, 0);
iput(p_inode);
@@ -964,7 +967,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
}
if (!p_inode)
- fsnotify_inode(inode, FS_MODIFY);
+ fsnotify_inode(inode, notify_event);
iput(inode);
}
@@ -1005,6 +1008,7 @@ void kernfs_notify(struct kernfs_node *kn)
if (!kn->attr.notify_next) {
kernfs_get(kn);
kn->attr.notify_next = kernfs_notify_list;
+ kn->attr.notify_event = FS_MODIFY;
kernfs_notify_list = kn;
schedule_work(&kernfs_notify_work);
}
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index b5a5f32fdfd1..1762b32c1a8e 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -181,6 +181,7 @@ struct kernfs_elem_attr {
struct kernfs_open_node __rcu *open;
loff_t size;
struct kernfs_node *notify_next; /* for kernfs_notify() */
+ u32 notify_event; /* for kernfs_notify() */
};
/*
--
2.53.0.rc2.204.g2597b5adb4-goog
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 2/3] kernfs: send IN_DELETE_SELF and IN_IGNORED on file deletion
2026-02-10 0:37 [PATCH 0/3] kernfs: Add inotify IN_DELETE_SELF, IN_IGNORED support for files T.J. Mercier
2026-02-10 0:37 ` [PATCH 1/3] kernfs: allow passing fsnotify event types T.J. Mercier
@ 2026-02-10 0:38 ` T.J. Mercier
2026-02-10 22:42 ` Tejun Heo
2026-02-10 0:38 ` [PATCH 3/3] selftests: memcg: Add tests IN_DELETE_SELF and IN_IGNORED on memory.events T.J. Mercier
2 siblings, 1 reply; 6+ messages in thread
From: T.J. Mercier @ 2026-02-10 0:38 UTC (permalink / raw)
To: gregkh, tj, driver-core, linux-kernel, cgroups, shuah,
linux-kselftest
Cc: T.J. Mercier
Currently some kernfs files (e.g. cgroup.events, memory.events) support
inotify watches for IN_MODIFY, but unlike with regular filesystems, they
do not receive IN_DELETE_SELF or IN_IGNORED events when they are
removed.
This creates a problem for processes monitoring cgroups. For example, a
service monitoring memory.events for memory.high breaches needs to know
when a cgroup is removed to clean up its state. Where it's known that a
cgroup is removed when all processes die, without IN_DELETE_SELF the
service must resort to inefficient workarounds such as:
1. Periodically scanning procfs to detect process death (wastes CPU and
is susceptible to PID reuse).
2. Placing an additional IN_DELETE watch on the parent directory
(wastes resources managing double the watches).
3. Holding a pidfd for every monitored cgroup (can exhaust file
descriptors).
This patch enables kernfs to send IN_DELETE_SELF and IN_IGNORED events.
This allows applications to rely on a single existing watch on the file
of interest (e.g. memory.events) to receive notifications for both
modifications and the eventual removal of the file, as well as automatic
watch descriptor cleanup, simplifying userspace logic and improving
resource efficiency.
Implementation details:
The kernfs notification worker is updated to handle file deletion.
fsnotify handles sending MODIFY events to both a watched file and its
parent, but it does not handle sending a DELETE event to the parent and
a DELETE_SELF event to the watched file in a single call. Therefore,
separate fsnotify calls are made: one for the parent (DELETE) and one
for the child (DELETE_SELF), while retaining the optimized single call
for MODIFY events.
Signed-off-by: T.J. Mercier <tjmercier@google.com>
---
fs/kernfs/dir.c | 21 +++++++++++++++++++++
fs/kernfs/file.c | 29 +++++++++++++++++++++++------
fs/kernfs/kernfs-internal.h | 3 +++
3 files changed, 47 insertions(+), 6 deletions(-)
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 29baeeb97871..74a4c347b78a 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -9,6 +9,7 @@
#include <linux/sched.h>
#include <linux/fs.h>
+#include <linux/fsnotify_backend.h>
#include <linux/namei.h>
#include <linux/idr.h>
#include <linux/slab.h>
@@ -1471,6 +1472,23 @@ void kernfs_show(struct kernfs_node *kn, bool show)
up_write(&root->kernfs_rwsem);
}
+static void kernfs_notify_file_deleted(struct kernfs_node *kn)
+{
+ static DECLARE_WORK(kernfs_notify_deleted_work,
+ kernfs_notify_workfn);
+
+ guard(spinlock_irqsave)(&kernfs_notify_lock);
+ /* may overwite already pending FS_MODIFY events */
+ kn->attr.notify_event = FS_DELETE;
+
+ if (!kn->attr.notify_next) {
+ kernfs_get(kn);
+ kn->attr.notify_next = kernfs_notify_list;
+ kernfs_notify_list = kn;
+ schedule_work(&kernfs_notify_deleted_work);
+ }
+}
+
static void __kernfs_remove(struct kernfs_node *kn)
{
struct kernfs_node *pos, *parent;
@@ -1520,6 +1538,9 @@ static void __kernfs_remove(struct kernfs_node *kn)
struct kernfs_iattrs *ps_iattr =
parent ? parent->iattr : NULL;
+ if (kernfs_type(kn) == KERNFS_FILE)
+ kernfs_notify_file_deleted(pos);
+
/* update timestamps on the parent */
down_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e978284ff983..3e813b09ab05 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -37,8 +37,8 @@ struct kernfs_open_node {
*/
#define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list)
-static DEFINE_SPINLOCK(kernfs_notify_lock);
-static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
+DEFINE_SPINLOCK(kernfs_notify_lock);
+struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn)
{
@@ -909,12 +909,21 @@ static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence)
return ret;
}
-static void kernfs_notify_workfn(struct work_struct *work)
+static int fsnotify_self_event(int event)
+{
+ if (event == FS_DELETE)
+ return FS_DELETE_SELF;
+
+ return event;
+}
+
+void kernfs_notify_workfn(struct work_struct *work)
{
struct kernfs_node *kn;
struct kernfs_super_info *info;
struct kernfs_root *root;
u32 notify_event;
+ u32 self_event;
repeat:
/* pop one off the notify_list */
spin_lock_irq(&kernfs_notify_lock);
@@ -929,6 +938,8 @@ static void kernfs_notify_workfn(struct work_struct *work)
kn->attr.notify_event = 0;
spin_unlock_irq(&kernfs_notify_lock);
+ self_event = fsnotify_self_event(notify_event);
+
root = kernfs_root(kn);
/* kick fsnotify */
@@ -959,15 +970,21 @@ static void kernfs_notify_workfn(struct work_struct *work)
if (p_inode) {
fsnotify(notify_event | FS_EVENT_ON_CHILD,
inode, FSNOTIFY_EVENT_INODE,
- p_inode, &name, inode, 0);
+ p_inode, &name,
+ (notify_event == self_event) ?
+ inode : NULL, 0);
iput(p_inode);
}
kernfs_put(parent);
}
- if (!p_inode)
- fsnotify_inode(inode, notify_event);
+ if (!p_inode || self_event != notify_event)
+ fsnotify_inode(inode, self_event);
+
+ /* For IN_IGNORED, and automatic watch descriptor removal */
+ if (self_event == FS_DELETE_SELF)
+ fsnotify_inode_delete(inode);
iput(inode);
}
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 6061b6f70d2a..cf4b21f4f3b6 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -199,6 +199,8 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
* file.c
*/
extern const struct file_operations kernfs_file_fops;
+extern struct kernfs_node *kernfs_notify_list;
+extern void kernfs_notify_workfn(struct work_struct *work);
bool kernfs_should_drain_open_files(struct kernfs_node *kn);
void kernfs_drain_open_files(struct kernfs_node *kn);
@@ -212,4 +214,5 @@ extern const struct inode_operations kernfs_symlink_iops;
* kernfs locks
*/
extern struct kernfs_global_locks *kernfs_locks;
+extern spinlock_t kernfs_notify_lock;
#endif /* __KERNFS_INTERNAL_H */
--
2.53.0.rc2.204.g2597b5adb4-goog
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH 2/3] kernfs: send IN_DELETE_SELF and IN_IGNORED on file deletion
2026-02-10 0:38 ` [PATCH 2/3] kernfs: send IN_DELETE_SELF and IN_IGNORED on file deletion T.J. Mercier
@ 2026-02-10 22:42 ` Tejun Heo
2026-02-10 22:48 ` T.J. Mercier
0 siblings, 1 reply; 6+ messages in thread
From: Tejun Heo @ 2026-02-10 22:42 UTC (permalink / raw)
To: T.J. Mercier
Cc: gregkh, driver-core, linux-kernel, cgroups, shuah,
linux-kselftest
On Mon, Feb 09, 2026 at 04:38:00PM -0800, T.J. Mercier wrote:
...
> static void __kernfs_remove(struct kernfs_node *kn)
> {
> struct kernfs_node *pos, *parent;
> @@ -1520,6 +1538,9 @@ static void __kernfs_remove(struct kernfs_node *kn)
> struct kernfs_iattrs *ps_iattr =
> parent ? parent->iattr : NULL;
>
> + if (kernfs_type(kn) == KERNFS_FILE)
kernfs_type(pos)?
> + kernfs_notify_file_deleted(pos);
> +
...
> -static void kernfs_notify_workfn(struct work_struct *work)
> +static int fsnotify_self_event(int event)
> +{
> + if (event == FS_DELETE)
> + return FS_DELETE_SELF;
> +
> + return event;
> +}
> +
> +void kernfs_notify_workfn(struct work_struct *work)
> {
> struct kernfs_node *kn;
> struct kernfs_super_info *info;
> struct kernfs_root *root;
> u32 notify_event;
> + u32 self_event;
> repeat:
> /* pop one off the notify_list */
> spin_lock_irq(&kernfs_notify_lock);
> @@ -929,6 +938,8 @@ static void kernfs_notify_workfn(struct work_struct *work)
> kn->attr.notify_event = 0;
> spin_unlock_irq(&kernfs_notify_lock);
>
> + self_event = fsnotify_self_event(notify_event);
Maybe just inline the conversion?
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH 2/3] kernfs: send IN_DELETE_SELF and IN_IGNORED on file deletion
2026-02-10 22:42 ` Tejun Heo
@ 2026-02-10 22:48 ` T.J. Mercier
0 siblings, 0 replies; 6+ messages in thread
From: T.J. Mercier @ 2026-02-10 22:48 UTC (permalink / raw)
To: Tejun Heo
Cc: gregkh, driver-core, linux-kernel, cgroups, shuah,
linux-kselftest
On Tue, Feb 10, 2026 at 2:42 PM Tejun Heo <tj@kernel.org> wrote:
>
> On Mon, Feb 09, 2026 at 04:38:00PM -0800, T.J. Mercier wrote:
> ...
> > static void __kernfs_remove(struct kernfs_node *kn)
> > {
> > struct kernfs_node *pos, *parent;
> > @@ -1520,6 +1538,9 @@ static void __kernfs_remove(struct kernfs_node *kn)
> > struct kernfs_iattrs *ps_iattr =
> > parent ? parent->iattr : NULL;
> >
> > + if (kernfs_type(kn) == KERNFS_FILE)
>
> kernfs_type(pos)?
Oh, yes you are right. Thanks.
> > + kernfs_notify_file_deleted(pos);
> > +
> ...
> > -static void kernfs_notify_workfn(struct work_struct *work)
> > +static int fsnotify_self_event(int event)
> > +{
> > + if (event == FS_DELETE)
> > + return FS_DELETE_SELF;
> > +
> > + return event;
> > +}
> > +
> > +void kernfs_notify_workfn(struct work_struct *work)
> > {
> > struct kernfs_node *kn;
> > struct kernfs_super_info *info;
> > struct kernfs_root *root;
> > u32 notify_event;
> > + u32 self_event;
> > repeat:
> > /* pop one off the notify_list */
> > spin_lock_irq(&kernfs_notify_lock);
> > @@ -929,6 +938,8 @@ static void kernfs_notify_workfn(struct work_struct *work)
> > kn->attr.notify_event = 0;
> > spin_unlock_irq(&kernfs_notify_lock);
> >
> > + self_event = fsnotify_self_event(notify_event);
>
> Maybe just inline the conversion?
Sure, sgtm. I figured the named function was a bit more
self-documenting, but it's just as easy to add checks for FS_DELETE
where self_event is used.
> Thanks.
>
> --
> tejun
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 3/3] selftests: memcg: Add tests IN_DELETE_SELF and IN_IGNORED on memory.events
2026-02-10 0:37 [PATCH 0/3] kernfs: Add inotify IN_DELETE_SELF, IN_IGNORED support for files T.J. Mercier
2026-02-10 0:37 ` [PATCH 1/3] kernfs: allow passing fsnotify event types T.J. Mercier
2026-02-10 0:38 ` [PATCH 2/3] kernfs: send IN_DELETE_SELF and IN_IGNORED on file deletion T.J. Mercier
@ 2026-02-10 0:38 ` T.J. Mercier
2 siblings, 0 replies; 6+ messages in thread
From: T.J. Mercier @ 2026-02-10 0:38 UTC (permalink / raw)
To: gregkh, tj, driver-core, linux-kernel, cgroups, shuah,
linux-kselftest
Cc: T.J. Mercier
Add two new tests that verify inotify events are sent when memcg files
are removed.
Signed-off-by: T.J. Mercier <tjmercier@google.com>
---
.../selftests/cgroup/test_memcontrol.c | 126 ++++++++++++++++++
1 file changed, 126 insertions(+)
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 4e1647568c5b..25a495347f7c 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -10,6 +10,7 @@
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
+#include <sys/inotify.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <arpa/inet.h>
@@ -1625,6 +1626,129 @@ static int test_memcg_oom_group_score_events(const char *root)
return ret;
}
+static int read_event(int inotify_fd, int expected_event, int expected_wd)
+{
+ struct inotify_event event;
+ ssize_t len = 0;
+
+ len = read(inotify_fd, &event, sizeof(event));
+ if (len < (ssize_t)sizeof(event))
+ return -1;
+
+ if (event.mask != expected_event || event.wd != expected_wd) {
+ fprintf(stderr,
+ "event does not match expected values: mask %d (expected %d) wd %d (expected %d)\n",
+ event.mask, expected_event, event.wd, expected_wd);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int test_memcg_inotify_delete_file(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg, *child_memcg;
+ int fd, wd;
+ struct inotify_event event;
+ ssize_t len = 0;
+
+ memcg = cg_name(root, "memcg_test_0");
+
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_write(memcg, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ child_memcg = cg_name(memcg, "child");
+ if (!child_memcg)
+ goto cleanup;
+
+ if (cg_create(child_memcg))
+ goto cleanup;
+
+ fd = inotify_init1(0);
+ if (fd == -1)
+ goto cleanup;
+
+ wd = inotify_add_watch(fd, cg_control(child_memcg, "memory.events"), IN_DELETE_SELF);
+ if (wd == -1)
+ goto cleanup;
+
+ cg_write(memcg, "cgroup.subtree_control", "-memory");
+
+ if (read_event(fd, IN_DELETE_SELF, wd))
+ goto cleanup;
+
+ if (read_event(fd, IN_IGNORED, wd))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (fd >= 0)
+ close(fd);
+ if (child_memcg)
+ cg_destroy(child_memcg);
+ free(child_memcg);
+ if (memcg)
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+static int test_memcg_inotify_delete_rmdir(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+ int fd, wd;
+ struct inotify_event event;
+ ssize_t len = 0;
+
+ memcg = cg_name(root, "memcg_test_0");
+
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ fd = inotify_init1(0);
+ if (fd == -1)
+ goto cleanup;
+
+ wd = inotify_add_watch(fd, cg_control(memcg, "memory.events"), IN_DELETE_SELF);
+ if (wd == -1)
+ goto cleanup;
+
+ if (cg_destroy(memcg))
+ goto cleanup;
+ free(memcg);
+ memcg = NULL;
+
+ if (read_event(fd, IN_DELETE_SELF, wd))
+ goto cleanup;
+
+ if (read_event(fd, IN_IGNORED, wd))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (fd >= 0)
+ close(fd);
+ if (memcg)
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
#define T(x) { x, #x }
struct memcg_test {
int (*fn)(const char *root);
@@ -1644,6 +1768,8 @@ struct memcg_test {
T(test_memcg_oom_group_leaf_events),
T(test_memcg_oom_group_parent_events),
T(test_memcg_oom_group_score_events),
+ T(test_memcg_inotify_delete_file),
+ T(test_memcg_inotify_delete_rmdir),
};
#undef T
--
2.53.0.rc2.204.g2597b5adb4-goog
^ permalink raw reply related [flat|nested] 6+ messages in thread