* [PATCH v4 1/4] fsnotify: add mount notification infrastructure
2025-01-23 19:41 [PATCH v4 0/4] mount notification Miklos Szeredi
@ 2025-01-23 19:41 ` Miklos Szeredi
2025-01-23 19:41 ` [PATCH v4 2/4] fanotify: notify on mount attach and detach Miklos Szeredi
` (2 subsequent siblings)
3 siblings, 0 replies; 14+ messages in thread
From: Miklos Szeredi @ 2025-01-23 19:41 UTC (permalink / raw)
To: linux-fsdevel
Cc: Christian Brauner, Jan Kara, Amir Goldstein, Karel Zak,
Lennart Poettering, Ian Kent, Al Viro, linux-security-module,
Paul Moore
This is just the plumbing between the event source (fs/namespace.c) and the
event consumer (fanotify). In itself it does nothing.
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
fs/mount.h | 4 +++
fs/notify/fsnotify.c | 47 +++++++++++++++++++++++++++-----
fs/notify/fsnotify.h | 11 ++++++++
fs/notify/mark.c | 14 ++++++++--
include/linux/fsnotify.h | 20 ++++++++++++++
include/linux/fsnotify_backend.h | 40 ++++++++++++++++++++++++++-
6 files changed, 125 insertions(+), 11 deletions(-)
diff --git a/fs/mount.h b/fs/mount.h
index 179f690a0c72..33311ad81042 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -14,6 +14,10 @@ struct mnt_namespace {
u64 seq; /* Sequence number to prevent loops */
wait_queue_head_t poll;
u64 event;
+#ifdef CONFIG_FSNOTIFY
+ __u32 n_fsnotify_mask;
+ struct fsnotify_mark_connector __rcu *n_fsnotify_marks;
+#endif
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index f976949d2634..2b2c3fd907c7 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -28,6 +28,11 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
fsnotify_clear_marks_by_mount(mnt);
}
+void __fsnotify_mntns_delete(struct mnt_namespace *mntns)
+{
+ fsnotify_clear_marks_by_mntns(mntns);
+}
+
/**
* fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
* @sb: superblock being unmounted.
@@ -402,7 +407,7 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
file_name, cookie, iter_info);
}
-static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp)
+static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector *const *connp)
{
struct fsnotify_mark_connector *conn;
struct hlist_node *node = NULL;
@@ -520,14 +525,15 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
{
const struct path *path = fsnotify_data_path(data, data_type);
struct super_block *sb = fsnotify_data_sb(data, data_type);
- struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
+ const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type);
+ struct fsnotify_sb_info *sbinfo = sb ? fsnotify_sb_info(sb) : NULL;
struct fsnotify_iter_info iter_info = {};
struct mount *mnt = NULL;
struct inode *inode2 = NULL;
struct dentry *moved;
int inode2_type;
int ret = 0;
- __u32 test_mask, marks_mask;
+ __u32 test_mask, marks_mask = 0;
if (path)
mnt = real_mount(path->mnt);
@@ -560,17 +566,20 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
if ((!sbinfo || !sbinfo->sb_marks) &&
(!mnt || !mnt->mnt_fsnotify_marks) &&
(!inode || !inode->i_fsnotify_marks) &&
- (!inode2 || !inode2->i_fsnotify_marks))
+ (!inode2 || !inode2->i_fsnotify_marks) &&
+ (!mnt_data || !mnt_data->ns->n_fsnotify_marks))
return 0;
- marks_mask = READ_ONCE(sb->s_fsnotify_mask);
+ if (sb)
+ marks_mask |= READ_ONCE(sb->s_fsnotify_mask);
if (mnt)
marks_mask |= READ_ONCE(mnt->mnt_fsnotify_mask);
if (inode)
marks_mask |= READ_ONCE(inode->i_fsnotify_mask);
if (inode2)
marks_mask |= READ_ONCE(inode2->i_fsnotify_mask);
-
+ if (mnt_data)
+ marks_mask |= READ_ONCE(mnt_data->ns->n_fsnotify_mask);
/*
* If this is a modify event we may need to clear some ignore masks.
@@ -600,6 +609,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
iter_info.marks[inode2_type] =
fsnotify_first_mark(&inode2->i_fsnotify_marks);
}
+ if (mnt_data) {
+ iter_info.marks[FSNOTIFY_ITER_TYPE_MNTNS] =
+ fsnotify_first_mark(&mnt_data->ns->n_fsnotify_marks);
+ }
/*
* We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
@@ -623,11 +636,31 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
}
EXPORT_SYMBOL_GPL(fsnotify);
+void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+ struct fsnotify_mnt data = {
+ .ns = ns,
+ .mnt_id = real_mount(mnt)->mnt_id_unique,
+ };
+
+ if (WARN_ON_ONCE(!ns))
+ return;
+
+ /*
+ * This is an optimization as well as making sure fsnotify_init() has
+ * been called.
+ */
+ if (!ns->n_fsnotify_marks)
+ return;
+
+ fsnotify(mask, &data, FSNOTIFY_EVENT_MNT, NULL, NULL, NULL, 0);
+}
+
static __init int fsnotify_init(void)
{
int ret;
- BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25);
ret = init_srcu_struct(&fsnotify_mark_srcu);
if (ret)
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 663759ed6fbc..5950c7a67f41 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -33,6 +33,12 @@ static inline struct super_block *fsnotify_conn_sb(
return conn->obj;
}
+static inline struct mnt_namespace *fsnotify_conn_mntns(
+ struct fsnotify_mark_connector *conn)
+{
+ return conn->obj;
+}
+
static inline struct super_block *fsnotify_object_sb(void *obj,
enum fsnotify_obj_type obj_type)
{
@@ -89,6 +95,11 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
fsnotify_destroy_marks(fsnotify_sb_marks(sb));
}
+static inline void fsnotify_clear_marks_by_mntns(struct mnt_namespace *mntns)
+{
+ fsnotify_destroy_marks(&mntns->n_fsnotify_marks);
+}
+
/*
* update the dentry->d_flags of all of inode's children to indicate if inode cares
* about events that happen to its children.
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 4981439e6209..798340db69d7 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -107,6 +107,8 @@ static fsnotify_connp_t *fsnotify_object_connp(void *obj,
return &real_mount(obj)->mnt_fsnotify_marks;
case FSNOTIFY_OBJ_TYPE_SB:
return fsnotify_sb_marks(obj);
+ case FSNOTIFY_OBJ_TYPE_MNTNS:
+ return &((struct mnt_namespace *)obj)->n_fsnotify_marks;
default:
return NULL;
}
@@ -120,6 +122,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask;
else if (conn->type == FSNOTIFY_OBJ_TYPE_SB)
return &fsnotify_conn_sb(conn)->s_fsnotify_mask;
+ else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS)
+ return &fsnotify_conn_mntns(conn)->n_fsnotify_mask;
return NULL;
}
@@ -346,12 +350,15 @@ static void *fsnotify_detach_connector_from_object(
fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
+ } else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) {
+ fsnotify_conn_mntns(conn)->n_fsnotify_mask = 0;
}
rcu_assign_pointer(*connp, NULL);
conn->obj = NULL;
conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
- fsnotify_update_sb_watchers(sb, conn);
+ if (sb)
+ fsnotify_update_sb_watchers(sb, conn);
return inode;
}
@@ -724,7 +731,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj,
* Attach the sb info before attaching a connector to any object on sb.
* The sb info will remain attached as long as sb lives.
*/
- if (!fsnotify_sb_info(sb)) {
+ if (sb && !fsnotify_sb_info(sb)) {
err = fsnotify_attach_info_to_sb(sb);
if (err)
return err;
@@ -770,7 +777,8 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj,
/* mark should be the last entry. last is the current last entry */
hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
added:
- fsnotify_update_sb_watchers(sb, conn);
+ if (sb)
+ fsnotify_update_sb_watchers(sb, conn);
/*
* Since connector is attached to object using cmpxchg() we are
* guaranteed that connector initialization is fully visible by anyone
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 278620e063ab..ea998551dd0d 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -255,6 +255,11 @@ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt)
__fsnotify_vfsmount_delete(mnt);
}
+static inline void fsnotify_mntns_delete(struct mnt_namespace *mntns)
+{
+ __fsnotify_mntns_delete(mntns);
+}
+
/*
* fsnotify_inoderemove - an inode is going away
*/
@@ -463,4 +468,19 @@ static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode,
NULL, NULL, NULL, 0);
}
+static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+ fsnotify_mnt(FS_MNT_ATTACH, ns, mnt);
+}
+
+static inline void fsnotify_mnt_detach(struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+ fsnotify_mnt(FS_MNT_DETACH, ns, mnt);
+}
+
+static inline void fsnotify_mnt_move(struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+ fsnotify_mnt(FS_MNT_MOVE, ns, mnt);
+}
+
#endif /* _LINUX_FS_NOTIFY_H */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 3ecf7768e577..6c3e3a4a7b10 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -56,6 +56,10 @@
#define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */
#define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */
+#define FS_MNT_ATTACH 0x01000000 /* Mount was attached */
+#define FS_MNT_DETACH 0x02000000 /* Mount was detached */
+#define FS_MNT_MOVE (FS_MNT_ATTACH | FS_MNT_DETACH)
+
/*
* Set on inode mark that cares about things that happen to its children.
* Always set for dnotify and inotify.
@@ -102,7 +106,7 @@
FS_EVENTS_POSS_ON_CHILD | \
FS_DELETE_SELF | FS_MOVE_SELF | \
FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
- FS_ERROR)
+ FS_ERROR | FS_MNT_ATTACH | FS_MNT_DETACH)
/* Extra flags that may be reported with event or control handling of events */
#define ALL_FSNOTIFY_FLAGS (FS_ISDIR | FS_EVENT_ON_CHILD | FS_DN_MULTISHOT)
@@ -288,6 +292,7 @@ enum fsnotify_data_type {
FSNOTIFY_EVENT_PATH,
FSNOTIFY_EVENT_INODE,
FSNOTIFY_EVENT_DENTRY,
+ FSNOTIFY_EVENT_MNT,
FSNOTIFY_EVENT_ERROR,
};
@@ -297,6 +302,11 @@ struct fs_error_report {
struct super_block *sb;
};
+struct fsnotify_mnt {
+ const struct mnt_namespace *ns;
+ u64 mnt_id;
+};
+
static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
{
switch (data_type) {
@@ -354,6 +364,24 @@ static inline struct super_block *fsnotify_data_sb(const void *data,
}
}
+static inline const struct fsnotify_mnt *fsnotify_data_mnt(const void *data,
+ int data_type)
+{
+ switch (data_type) {
+ case FSNOTIFY_EVENT_MNT:
+ return data;
+ default:
+ return NULL;
+ }
+}
+
+static inline u64 fsnotify_data_mnt_id(const void *data, int data_type)
+{
+ const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type);
+
+ return mnt_data ? mnt_data->mnt_id : 0;
+}
+
static inline struct fs_error_report *fsnotify_data_error_report(
const void *data,
int data_type)
@@ -379,6 +407,7 @@ enum fsnotify_iter_type {
FSNOTIFY_ITER_TYPE_SB,
FSNOTIFY_ITER_TYPE_PARENT,
FSNOTIFY_ITER_TYPE_INODE2,
+ FSNOTIFY_ITER_TYPE_MNTNS,
FSNOTIFY_ITER_TYPE_COUNT
};
@@ -388,6 +417,7 @@ enum fsnotify_obj_type {
FSNOTIFY_OBJ_TYPE_INODE,
FSNOTIFY_OBJ_TYPE_VFSMOUNT,
FSNOTIFY_OBJ_TYPE_SB,
+ FSNOTIFY_OBJ_TYPE_MNTNS,
FSNOTIFY_OBJ_TYPE_COUNT,
FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT
};
@@ -572,8 +602,10 @@ extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data
extern void __fsnotify_inode_delete(struct inode *inode);
extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt);
extern void fsnotify_sb_delete(struct super_block *sb);
+extern void __fsnotify_mntns_delete(struct mnt_namespace *mntns);
extern void fsnotify_sb_free(struct super_block *sb);
extern u32 fsnotify_get_cookie(void);
+extern void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt);
static inline __u32 fsnotify_parent_needed_mask(__u32 mask)
{
@@ -879,6 +911,9 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
static inline void fsnotify_sb_delete(struct super_block *sb)
{}
+static inline void __fsnotify_mntns_delete(struct mnt_namespace *mntns)
+{}
+
static inline void fsnotify_sb_free(struct super_block *sb)
{}
@@ -893,6 +928,9 @@ static inline u32 fsnotify_get_cookie(void)
static inline void fsnotify_unmount_inodes(struct super_block *sb)
{}
+static inline void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt)
+{}
+
#endif /* CONFIG_FSNOTIFY */
#endif /* __KERNEL __ */
--
2.47.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH v4 2/4] fanotify: notify on mount attach and detach
2025-01-23 19:41 [PATCH v4 0/4] mount notification Miklos Szeredi
2025-01-23 19:41 ` [PATCH v4 1/4] fsnotify: add mount notification infrastructure Miklos Szeredi
@ 2025-01-23 19:41 ` Miklos Szeredi
2025-01-24 19:38 ` Paul Moore
2025-01-23 19:41 ` [PATCH v4 3/4] vfs: add notifications for " Miklos Szeredi
2025-01-23 19:41 ` [PATCH v4 4/4] vfs: add notifications for mount attribute change Miklos Szeredi
3 siblings, 1 reply; 14+ messages in thread
From: Miklos Szeredi @ 2025-01-23 19:41 UTC (permalink / raw)
To: linux-fsdevel
Cc: Christian Brauner, Jan Kara, Amir Goldstein, Karel Zak,
Lennart Poettering, Ian Kent, Al Viro, linux-security-module,
Paul Moore
Add notifications for attaching and detaching mounts. The following new
event masks are added:
FAN_MNT_ATTACH - Mount was attached
FAN_MNT_DETACH - Mount was detached
If a mount is moved, then the event is reported with (FAN_MNT_ATTACH |
FAN_MNT_DETACH).
These events add an info record of type FAN_EVENT_INFO_TYPE_MNT containing
these fields identifying the affected mounts:
__u64 mnt_id - the ID of the mount (see statmount(2))
FAN_REPORT_MNT must be supplied to fanotify_init() to receive these events
and no other type of event can be received with this report type.
Marks are added with FAN_MARK_MNTNS, which records the mount namespace from
an nsfs file (e.g. /proc/self/ns/mnt).
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
fs/mount.h | 2 +
fs/namespace.c | 14 +++--
fs/notify/fanotify/fanotify.c | 38 +++++++++++--
fs/notify/fanotify/fanotify.h | 18 +++++++
fs/notify/fanotify/fanotify_user.c | 86 +++++++++++++++++++++++++-----
fs/notify/fdinfo.c | 5 ++
include/linux/fanotify.h | 12 +++--
include/uapi/linux/fanotify.h | 10 ++++
security/selinux/hooks.c | 4 ++
9 files changed, 166 insertions(+), 23 deletions(-)
diff --git a/fs/mount.h b/fs/mount.h
index 33311ad81042..9689e7bf4501 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -174,3 +174,5 @@ static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
{
return container_of(ns, struct mnt_namespace, ns);
}
+
+struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);
diff --git a/fs/namespace.c b/fs/namespace.c
index eac057e56948..4d9072fd1263 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2101,16 +2101,24 @@ struct mnt_namespace *__lookup_next_mnt_ns(struct mnt_namespace *mntns, bool pre
}
}
+struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry)
+{
+ if (!is_mnt_ns_file(dentry))
+ return NULL;
+
+ return to_mnt_ns(get_proc_ns(dentry->d_inode));
+}
+
static bool mnt_ns_loop(struct dentry *dentry)
{
/* Could bind mounting the mount namespace inode cause a
* mount namespace loop?
*/
- struct mnt_namespace *mnt_ns;
- if (!is_mnt_ns_file(dentry))
+ struct mnt_namespace *mnt_ns = mnt_ns_from_dentry(dentry);
+
+ if (!mnt_ns)
return false;
- mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
}
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 24c7c5df4998..b1937f92f105 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -166,6 +166,8 @@ static bool fanotify_should_merge(struct fanotify_event *old,
case FANOTIFY_EVENT_TYPE_FS_ERROR:
return fanotify_error_event_equal(FANOTIFY_EE(old),
FANOTIFY_EE(new));
+ case FANOTIFY_EVENT_TYPE_MNT:
+ return false;
default:
WARN_ON_ONCE(1);
}
@@ -303,7 +305,10 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
__func__, iter_info->report_mask, event_mask, data, data_type);
- if (!fid_mode) {
+ if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
+ if (data_type != FSNOTIFY_EVENT_MNT)
+ return 0;
+ } else if (!fid_mode) {
/* Do we have path to open a file descriptor? */
if (!path)
return 0;
@@ -548,6 +553,20 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
return &pevent->fae;
}
+static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp)
+{
+ struct fanotify_mnt_event *pevent;
+
+ pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp);
+ if (!pevent)
+ return NULL;
+
+ pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT;
+ pevent->mnt_id = mnt_id;
+
+ return &pevent->fae;
+}
+
static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path,
gfp_t gfp)
{
@@ -715,6 +734,7 @@ static struct fanotify_event *fanotify_alloc_event(
fid_mode);
struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
const struct path *path = fsnotify_data_path(data, data_type);
+ u64 mnt_id = fsnotify_data_mnt_id(data, data_type);
struct mem_cgroup *old_memcg;
struct dentry *moved = NULL;
struct inode *child = NULL;
@@ -810,8 +830,12 @@ static struct fanotify_event *fanotify_alloc_event(
moved, &hash, gfp);
} else if (fid_mode) {
event = fanotify_alloc_fid_event(id, fsid, &hash, gfp);
- } else {
+ } else if (path) {
event = fanotify_alloc_path_event(path, &hash, gfp);
+ } else if (mnt_id) {
+ event = fanotify_alloc_mnt_event(mnt_id, gfp);
+ } else {
+ WARN_ON_ONCE(1);
}
if (!event)
@@ -910,7 +934,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
- BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 23);
mask = fanotify_group_event_mask(group, iter_info, &match_mask,
mask, data, data_type, dir);
@@ -1011,6 +1035,11 @@ static void fanotify_free_error_event(struct fsnotify_group *group,
mempool_free(fee, &group->fanotify_data.error_events_pool);
}
+static void fanotify_free_mnt_event(struct fanotify_event *event)
+{
+ kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event));
+}
+
static void fanotify_free_event(struct fsnotify_group *group,
struct fsnotify_event *fsn_event)
{
@@ -1037,6 +1066,9 @@ static void fanotify_free_event(struct fsnotify_group *group,
case FANOTIFY_EVENT_TYPE_FS_ERROR:
fanotify_free_error_event(group, event);
break;
+ case FANOTIFY_EVENT_TYPE_MNT:
+ fanotify_free_mnt_event(event);
+ break;
default:
WARN_ON_ONCE(1);
}
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index e5ab33cae6a7..f1a7cbedc9e3 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -9,6 +9,7 @@ extern struct kmem_cache *fanotify_mark_cache;
extern struct kmem_cache *fanotify_fid_event_cachep;
extern struct kmem_cache *fanotify_path_event_cachep;
extern struct kmem_cache *fanotify_perm_event_cachep;
+extern struct kmem_cache *fanotify_mnt_event_cachep;
/* Possible states of the permission event */
enum {
@@ -244,6 +245,7 @@ enum fanotify_event_type {
FANOTIFY_EVENT_TYPE_PATH_PERM,
FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
+ FANOTIFY_EVENT_TYPE_MNT,
__FANOTIFY_EVENT_TYPE_NUM
};
@@ -409,12 +411,23 @@ struct fanotify_path_event {
struct path path;
};
+struct fanotify_mnt_event {
+ struct fanotify_event fae;
+ u64 mnt_id;
+};
+
static inline struct fanotify_path_event *
FANOTIFY_PE(struct fanotify_event *event)
{
return container_of(event, struct fanotify_path_event, fae);
}
+static inline struct fanotify_mnt_event *
+FANOTIFY_ME(struct fanotify_event *event)
+{
+ return container_of(event, struct fanotify_mnt_event, fae);
+}
+
/*
* Structure for permission fanotify events. It gets allocated and freed in
* fanotify_handle_event() since we wait there for user response. When the
@@ -456,6 +469,11 @@ static inline bool fanotify_is_error_event(u32 mask)
return mask & FAN_FS_ERROR;
}
+static inline bool fanotify_is_mnt_event(u32 mask)
+{
+ return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH);
+}
+
static inline const struct path *fanotify_event_path(struct fanotify_event *event)
{
if (event->type == FANOTIFY_EVENT_TYPE_PATH)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 2d85c71717d6..da97eb01e2fa 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -114,6 +114,7 @@ struct kmem_cache *fanotify_mark_cache __ro_after_init;
struct kmem_cache *fanotify_fid_event_cachep __ro_after_init;
struct kmem_cache *fanotify_path_event_cachep __ro_after_init;
struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
+struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init;
#define FANOTIFY_EVENT_ALIGN 4
#define FANOTIFY_FID_INFO_HDR_LEN \
@@ -122,6 +123,8 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
sizeof(struct fanotify_event_info_pidfd)
#define FANOTIFY_ERROR_INFO_LEN \
(sizeof(struct fanotify_event_info_error))
+#define FANOTIFY_MNT_INFO_LEN \
+ (sizeof(struct fanotify_event_info_mnt))
static int fanotify_fid_info_len(int fh_len, int name_len)
{
@@ -183,6 +186,8 @@ static size_t fanotify_event_len(unsigned int info_mode,
fh_len = fanotify_event_object_fh_len(event);
event_len += fanotify_fid_info_len(fh_len, dot_len);
}
+ if (fanotify_is_mnt_event(event->mask))
+ event_len += FANOTIFY_MNT_INFO_LEN;
return event_len;
}
@@ -380,6 +385,25 @@ static int process_access_response(struct fsnotify_group *group,
return -ENOENT;
}
+static size_t copy_mnt_info_to_user(struct fanotify_event *event,
+ char __user *buf, int count)
+{
+ struct fanotify_event_info_mnt info = { };
+
+ info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT;
+ info.hdr.len = FANOTIFY_MNT_INFO_LEN;
+
+ if (WARN_ON(count < info.hdr.len))
+ return -EFAULT;
+
+ info.mnt_id = FANOTIFY_ME(event)->mnt_id;
+
+ if (copy_to_user(buf, &info, sizeof(info)))
+ return -EFAULT;
+
+ return info.hdr.len;
+}
+
static size_t copy_error_info_to_user(struct fanotify_event *event,
char __user *buf, int count)
{
@@ -642,6 +666,14 @@ static int copy_info_records_to_user(struct fanotify_event *event,
total_bytes += ret;
}
+ if (fanotify_is_mnt_event(event->mask)) {
+ ret = copy_mnt_info_to_user(event, buf, count);
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ count -= ret;
+ total_bytes += ret;
+ }
return total_bytes;
}
@@ -1446,6 +1478,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID))
return -EINVAL;
+ /* Don't allow mixing mnt events with inode events for now */
+ if (flags & FAN_REPORT_MNT) {
+ if (class != FAN_CLASS_NOTIF)
+ return -EINVAL;
+ if (flags & (FANOTIFY_FID_BITS | FAN_REPORT_FD_ERROR))
+ return -EINVAL;
+ }
+
if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS)
return -EINVAL;
@@ -1685,7 +1725,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
int dfd, const char __user *pathname)
{
struct inode *inode = NULL;
- struct vfsmount *mnt = NULL;
struct fsnotify_group *group;
struct path path;
struct fan_fsid __fsid, *fsid = NULL;
@@ -1718,6 +1757,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
case FAN_MARK_FILESYSTEM:
obj_type = FSNOTIFY_OBJ_TYPE_SB;
break;
+ case FAN_MARK_MNTNS:
+ obj_type = FSNOTIFY_OBJ_TYPE_MNTNS;
+ break;
default:
return -EINVAL;
}
@@ -1765,6 +1807,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
return -EINVAL;
group = fd_file(f)->private_data;
+ /* Only report mount events on mnt namespace */
+ if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
+ if (mask & ~FANOTIFY_MOUNT_EVENTS)
+ return -EINVAL;
+ if (mark_type != FAN_MARK_MNTNS)
+ return -EINVAL;
+ } else {
+ if (mask & FANOTIFY_MOUNT_EVENTS)
+ return -EINVAL;
+ if (mark_type == FAN_MARK_MNTNS)
+ return -EINVAL;
+ }
+
/*
* An unprivileged user is not allowed to setup mount nor filesystem
* marks. This also includes setting up such marks by a group that
@@ -1802,7 +1857,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
* point.
*/
fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
- if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) &&
+ if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_MOUNT_EVENTS|FANOTIFY_EVENT_FLAGS) &&
(!fid_mode || mark_type == FAN_MARK_MOUNT))
return -EINVAL;
@@ -1848,17 +1903,21 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
/* inode held in place by reference to path; group by fget on fd */
- if (mark_type == FAN_MARK_INODE) {
+ if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = path.dentry->d_inode;
obj = inode;
- } else {
- mnt = path.mnt;
- if (mark_type == FAN_MARK_MOUNT)
- obj = mnt;
- else
- obj = mnt->mnt_sb;
+ } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
+ obj = path.mnt;
+ } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) {
+ obj = path.mnt->mnt_sb;
+ } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) {
+ obj = mnt_ns_from_dentry(path.dentry);
}
+ ret = -EINVAL;
+ if (!obj)
+ goto path_put_and_out;
+
/*
* If some other task has this inode open for write we should not add
* an ignore mask, unless that ignore mask is supposed to survive
@@ -1866,10 +1925,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
*/
if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) &&
!(flags & FAN_MARK_IGNORED_SURV_MODIFY)) {
- ret = mnt ? -EINVAL : -EISDIR;
+ ret = !inode ? -EINVAL : -EISDIR;
/* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
if (ignore == FAN_MARK_IGNORE &&
- (mnt || S_ISDIR(inode->i_mode)))
+ (!inode || S_ISDIR(inode->i_mode)))
goto path_put_and_out;
ret = 0;
@@ -1878,7 +1937,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
- if (mnt || !S_ISDIR(inode->i_mode)) {
+ if (!inode || !S_ISDIR(inode->i_mode)) {
mask &= ~FAN_EVENT_ON_CHILD;
umask = FAN_EVENT_ON_CHILD;
/*
@@ -1952,7 +2011,7 @@ static int __init fanotify_user_setup(void)
FANOTIFY_DEFAULT_MAX_USER_MARKS);
BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
- BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 13);
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);
fanotify_mark_cache = KMEM_CACHE(fanotify_mark,
@@ -1965,6 +2024,7 @@ static int __init fanotify_user_setup(void)
fanotify_perm_event_cachep =
KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
}
+ fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC);
fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS;
init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] =
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index e933f9c65d90..1161eabf11ee 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -121,6 +121,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
sb->s_dev, mflags, mark->mask, mark->ignore_mask);
+ } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_MNTNS) {
+ struct mnt_namespace *mnt_ns = fsnotify_conn_mntns(mark->connector);
+
+ seq_printf(m, "fanotify mnt_ns:%u mflags:%x mask:%x ignored_mask:%x\n",
+ mnt_ns->ns.inum, mflags, mark->mask, mark->ignore_mask);
}
}
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 89ff45bd6f01..fc142be2542d 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -25,7 +25,7 @@
#define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET)
-#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD)
+#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD | FAN_REPORT_MNT)
/*
* fanotify_init() flags that require CAP_SYS_ADMIN.
@@ -38,7 +38,8 @@
FAN_REPORT_PIDFD | \
FAN_REPORT_FD_ERROR | \
FAN_UNLIMITED_QUEUE | \
- FAN_UNLIMITED_MARKS)
+ FAN_UNLIMITED_MARKS | \
+ FAN_REPORT_MNT)
/*
* fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN.
@@ -58,7 +59,7 @@
#define FANOTIFY_INTERNAL_GROUP_FLAGS (FANOTIFY_UNPRIV)
#define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \
- FAN_MARK_FILESYSTEM)
+ FAN_MARK_FILESYSTEM | FAN_MARK_MNTNS)
#define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \
FAN_MARK_FLUSH)
@@ -99,10 +100,13 @@
/* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */
#define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR)
+#define FANOTIFY_MOUNT_EVENTS (FAN_MNT_ATTACH | FAN_MNT_DETACH)
+
/* Events that user can request to be notified on */
#define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \
FANOTIFY_INODE_EVENTS | \
- FANOTIFY_ERROR_EVENTS)
+ FANOTIFY_ERROR_EVENTS | \
+ FANOTIFY_MOUNT_EVENTS)
/* Events that require a permission response from user */
#define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM | \
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index 34f221d3a1b9..69340e483ae7 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -25,6 +25,8 @@
#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */
#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */
#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */
+#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
+#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */
@@ -61,6 +63,7 @@
#define FAN_REPORT_NAME 0x00000800 /* Report events with name */
#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */
#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */
+#define FAN_REPORT_MNT 0x00004000 /* Report mount events */
/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
@@ -91,6 +94,7 @@
#define FAN_MARK_INODE 0x00000000
#define FAN_MARK_MOUNT 0x00000010
#define FAN_MARK_FILESYSTEM 0x00000100
+#define FAN_MARK_MNTNS 0x00000110
/*
* Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY
@@ -143,6 +147,7 @@ struct fanotify_event_metadata {
#define FAN_EVENT_INFO_TYPE_DFID 3
#define FAN_EVENT_INFO_TYPE_PIDFD 4
#define FAN_EVENT_INFO_TYPE_ERROR 5
+#define FAN_EVENT_INFO_TYPE_MNT 6
/* Special info types for FAN_RENAME */
#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10
@@ -189,6 +194,11 @@ struct fanotify_event_info_error {
__u32 error_count;
};
+struct fanotify_event_info_mnt {
+ struct fanotify_event_info_header hdr;
+ __u64 mnt_id;
+};
+
/*
* User space may need to record additional information about its decision.
* The extra information type records what kind of information is included.
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 171dd7fceac5..d2b3e60e2be9 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3395,6 +3395,10 @@ static int selinux_path_notify(const struct path *path, u64 mask,
case FSNOTIFY_OBJ_TYPE_INODE:
perm = FILE__WATCH;
break;
+ case FSNOTIFY_OBJ_TYPE_MNTNS:
+ /* FIXME: Is this correct??? */
+ perm = FILE__WATCH;
+ break;
default:
return -EINVAL;
}
--
2.47.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH v4 2/4] fanotify: notify on mount attach and detach
2025-01-23 19:41 ` [PATCH v4 2/4] fanotify: notify on mount attach and detach Miklos Szeredi
@ 2025-01-24 19:38 ` Paul Moore
2025-01-25 1:09 ` Russell Coker
2025-01-28 18:11 ` Daniel Burgener
0 siblings, 2 replies; 14+ messages in thread
From: Paul Moore @ 2025-01-24 19:38 UTC (permalink / raw)
To: Miklos Szeredi
Cc: linux-fsdevel, Christian Brauner, Jan Kara, Amir Goldstein,
Karel Zak, Lennart Poettering, Ian Kent, Al Viro,
linux-security-module, selinux, selinux-refpolicy
On Thu, Jan 23, 2025 at 2:41 PM Miklos Szeredi <mszeredi@redhat.com> wrote:
>
> Add notifications for attaching and detaching mounts.
Adding the SELinux list to my reply as things like this really should
go to the full list. For those of you seeing this for the first time,
the full patchset from Miklos can be found at the lore link below.
Policy folks, please read my comments at the bottom, there is a
question for you there.
https://lore.kernel.org/linux-security-module/20250123194108.1025273-1-mszeredi@redhat.com
> The following new event masks are added:
>
> FAN_MNT_ATTACH - Mount was attached
> FAN_MNT_DETACH - Mount was detached
>
> If a mount is moved, then the event is reported with (FAN_MNT_ATTACH |
> FAN_MNT_DETACH).
>
> These events add an info record of type FAN_EVENT_INFO_TYPE_MNT containing
> these fields identifying the affected mounts:
>
> __u64 mnt_id - the ID of the mount (see statmount(2))
>
> FAN_REPORT_MNT must be supplied to fanotify_init() to receive these events
> and no other type of event can be received with this report type.
>
> Marks are added with FAN_MARK_MNTNS, which records the mount namespace from
> an nsfs file (e.g. /proc/self/ns/mnt).
The policy folks can correct me, but to put some context on this
(pardon the pun), I believe most public SELinux policies label the
nsfs files based on the associated process, e.g. if sshd is running in
the sshd_t domain, the /proc/<sshd>/ns files will be labeled as
sshd_t.
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
> fs/mount.h | 2 +
> fs/namespace.c | 14 +++--
> fs/notify/fanotify/fanotify.c | 38 +++++++++++--
> fs/notify/fanotify/fanotify.h | 18 +++++++
> fs/notify/fanotify/fanotify_user.c | 86 +++++++++++++++++++++++++-----
> fs/notify/fdinfo.c | 5 ++
> include/linux/fanotify.h | 12 +++--
> include/uapi/linux/fanotify.h | 10 ++++
> security/selinux/hooks.c | 4 ++
> 9 files changed, 166 insertions(+), 23 deletions(-)
I'll refrain from trimming the original patch for easier viewing by
the SELinux folks.
> diff --git a/fs/mount.h b/fs/mount.h
> index 33311ad81042..9689e7bf4501 100644
> --- a/fs/mount.h
> +++ b/fs/mount.h
> @@ -174,3 +174,5 @@ static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
> {
> return container_of(ns, struct mnt_namespace, ns);
> }
> +
> +struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);
> diff --git a/fs/namespace.c b/fs/namespace.c
> index eac057e56948..4d9072fd1263 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -2101,16 +2101,24 @@ struct mnt_namespace *__lookup_next_mnt_ns(struct mnt_namespace *mntns, bool pre
> }
> }
>
> +struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry)
> +{
> + if (!is_mnt_ns_file(dentry))
> + return NULL;
> +
> + return to_mnt_ns(get_proc_ns(dentry->d_inode));
> +}
> +
> static bool mnt_ns_loop(struct dentry *dentry)
> {
> /* Could bind mounting the mount namespace inode cause a
> * mount namespace loop?
> */
> - struct mnt_namespace *mnt_ns;
> - if (!is_mnt_ns_file(dentry))
> + struct mnt_namespace *mnt_ns = mnt_ns_from_dentry(dentry);
> +
> + if (!mnt_ns)
> return false;
>
> - mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
> return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
> }
>
> diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> index 24c7c5df4998..b1937f92f105 100644
> --- a/fs/notify/fanotify/fanotify.c
> +++ b/fs/notify/fanotify/fanotify.c
> @@ -166,6 +166,8 @@ static bool fanotify_should_merge(struct fanotify_event *old,
> case FANOTIFY_EVENT_TYPE_FS_ERROR:
> return fanotify_error_event_equal(FANOTIFY_EE(old),
> FANOTIFY_EE(new));
> + case FANOTIFY_EVENT_TYPE_MNT:
> + return false;
> default:
> WARN_ON_ONCE(1);
> }
> @@ -303,7 +305,10 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
> pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
> __func__, iter_info->report_mask, event_mask, data, data_type);
>
> - if (!fid_mode) {
> + if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
> + if (data_type != FSNOTIFY_EVENT_MNT)
> + return 0;
> + } else if (!fid_mode) {
> /* Do we have path to open a file descriptor? */
> if (!path)
> return 0;
> @@ -548,6 +553,20 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
> return &pevent->fae;
> }
>
> +static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp)
> +{
> + struct fanotify_mnt_event *pevent;
> +
> + pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp);
> + if (!pevent)
> + return NULL;
> +
> + pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT;
> + pevent->mnt_id = mnt_id;
> +
> + return &pevent->fae;
> +}
> +
> static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path,
> gfp_t gfp)
> {
> @@ -715,6 +734,7 @@ static struct fanotify_event *fanotify_alloc_event(
> fid_mode);
> struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
> const struct path *path = fsnotify_data_path(data, data_type);
> + u64 mnt_id = fsnotify_data_mnt_id(data, data_type);
> struct mem_cgroup *old_memcg;
> struct dentry *moved = NULL;
> struct inode *child = NULL;
> @@ -810,8 +830,12 @@ static struct fanotify_event *fanotify_alloc_event(
> moved, &hash, gfp);
> } else if (fid_mode) {
> event = fanotify_alloc_fid_event(id, fsid, &hash, gfp);
> - } else {
> + } else if (path) {
> event = fanotify_alloc_path_event(path, &hash, gfp);
> + } else if (mnt_id) {
> + event = fanotify_alloc_mnt_event(mnt_id, gfp);
> + } else {
> + WARN_ON_ONCE(1);
> }
>
> if (!event)
> @@ -910,7 +934,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
> BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
> BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
>
> - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21);
> + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 23);
>
> mask = fanotify_group_event_mask(group, iter_info, &match_mask,
> mask, data, data_type, dir);
> @@ -1011,6 +1035,11 @@ static void fanotify_free_error_event(struct fsnotify_group *group,
> mempool_free(fee, &group->fanotify_data.error_events_pool);
> }
>
> +static void fanotify_free_mnt_event(struct fanotify_event *event)
> +{
> + kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event));
> +}
> +
> static void fanotify_free_event(struct fsnotify_group *group,
> struct fsnotify_event *fsn_event)
> {
> @@ -1037,6 +1066,9 @@ static void fanotify_free_event(struct fsnotify_group *group,
> case FANOTIFY_EVENT_TYPE_FS_ERROR:
> fanotify_free_error_event(group, event);
> break;
> + case FANOTIFY_EVENT_TYPE_MNT:
> + fanotify_free_mnt_event(event);
> + break;
> default:
> WARN_ON_ONCE(1);
> }
> diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> index e5ab33cae6a7..f1a7cbedc9e3 100644
> --- a/fs/notify/fanotify/fanotify.h
> +++ b/fs/notify/fanotify/fanotify.h
> @@ -9,6 +9,7 @@ extern struct kmem_cache *fanotify_mark_cache;
> extern struct kmem_cache *fanotify_fid_event_cachep;
> extern struct kmem_cache *fanotify_path_event_cachep;
> extern struct kmem_cache *fanotify_perm_event_cachep;
> +extern struct kmem_cache *fanotify_mnt_event_cachep;
>
> /* Possible states of the permission event */
> enum {
> @@ -244,6 +245,7 @@ enum fanotify_event_type {
> FANOTIFY_EVENT_TYPE_PATH_PERM,
> FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
> FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
> + FANOTIFY_EVENT_TYPE_MNT,
> __FANOTIFY_EVENT_TYPE_NUM
> };
>
> @@ -409,12 +411,23 @@ struct fanotify_path_event {
> struct path path;
> };
>
> +struct fanotify_mnt_event {
> + struct fanotify_event fae;
> + u64 mnt_id;
> +};
> +
> static inline struct fanotify_path_event *
> FANOTIFY_PE(struct fanotify_event *event)
> {
> return container_of(event, struct fanotify_path_event, fae);
> }
>
> +static inline struct fanotify_mnt_event *
> +FANOTIFY_ME(struct fanotify_event *event)
> +{
> + return container_of(event, struct fanotify_mnt_event, fae);
> +}
> +
> /*
> * Structure for permission fanotify events. It gets allocated and freed in
> * fanotify_handle_event() since we wait there for user response. When the
> @@ -456,6 +469,11 @@ static inline bool fanotify_is_error_event(u32 mask)
> return mask & FAN_FS_ERROR;
> }
>
> +static inline bool fanotify_is_mnt_event(u32 mask)
> +{
> + return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH);
> +}
> +
> static inline const struct path *fanotify_event_path(struct fanotify_event *event)
> {
> if (event->type == FANOTIFY_EVENT_TYPE_PATH)
> diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
> index 2d85c71717d6..da97eb01e2fa 100644
> --- a/fs/notify/fanotify/fanotify_user.c
> +++ b/fs/notify/fanotify/fanotify_user.c
> @@ -114,6 +114,7 @@ struct kmem_cache *fanotify_mark_cache __ro_after_init;
> struct kmem_cache *fanotify_fid_event_cachep __ro_after_init;
> struct kmem_cache *fanotify_path_event_cachep __ro_after_init;
> struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
> +struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init;
>
> #define FANOTIFY_EVENT_ALIGN 4
> #define FANOTIFY_FID_INFO_HDR_LEN \
> @@ -122,6 +123,8 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
> sizeof(struct fanotify_event_info_pidfd)
> #define FANOTIFY_ERROR_INFO_LEN \
> (sizeof(struct fanotify_event_info_error))
> +#define FANOTIFY_MNT_INFO_LEN \
> + (sizeof(struct fanotify_event_info_mnt))
>
> static int fanotify_fid_info_len(int fh_len, int name_len)
> {
> @@ -183,6 +186,8 @@ static size_t fanotify_event_len(unsigned int info_mode,
> fh_len = fanotify_event_object_fh_len(event);
> event_len += fanotify_fid_info_len(fh_len, dot_len);
> }
> + if (fanotify_is_mnt_event(event->mask))
> + event_len += FANOTIFY_MNT_INFO_LEN;
>
> return event_len;
> }
> @@ -380,6 +385,25 @@ static int process_access_response(struct fsnotify_group *group,
> return -ENOENT;
> }
>
> +static size_t copy_mnt_info_to_user(struct fanotify_event *event,
> + char __user *buf, int count)
> +{
> + struct fanotify_event_info_mnt info = { };
> +
> + info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT;
> + info.hdr.len = FANOTIFY_MNT_INFO_LEN;
> +
> + if (WARN_ON(count < info.hdr.len))
> + return -EFAULT;
> +
> + info.mnt_id = FANOTIFY_ME(event)->mnt_id;
> +
> + if (copy_to_user(buf, &info, sizeof(info)))
> + return -EFAULT;
> +
> + return info.hdr.len;
> +}
> +
> static size_t copy_error_info_to_user(struct fanotify_event *event,
> char __user *buf, int count)
> {
> @@ -642,6 +666,14 @@ static int copy_info_records_to_user(struct fanotify_event *event,
> total_bytes += ret;
> }
>
> + if (fanotify_is_mnt_event(event->mask)) {
> + ret = copy_mnt_info_to_user(event, buf, count);
> + if (ret < 0)
> + return ret;
> + buf += ret;
> + count -= ret;
> + total_bytes += ret;
> + }
> return total_bytes;
> }
>
> @@ -1446,6 +1478,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
> if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID))
> return -EINVAL;
>
> + /* Don't allow mixing mnt events with inode events for now */
> + if (flags & FAN_REPORT_MNT) {
> + if (class != FAN_CLASS_NOTIF)
> + return -EINVAL;
> + if (flags & (FANOTIFY_FID_BITS | FAN_REPORT_FD_ERROR))
> + return -EINVAL;
> + }
> +
> if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS)
> return -EINVAL;
>
> @@ -1685,7 +1725,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
> int dfd, const char __user *pathname)
> {
> struct inode *inode = NULL;
> - struct vfsmount *mnt = NULL;
> struct fsnotify_group *group;
> struct path path;
> struct fan_fsid __fsid, *fsid = NULL;
> @@ -1718,6 +1757,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
> case FAN_MARK_FILESYSTEM:
> obj_type = FSNOTIFY_OBJ_TYPE_SB;
> break;
> + case FAN_MARK_MNTNS:
> + obj_type = FSNOTIFY_OBJ_TYPE_MNTNS;
> + break;
> default:
> return -EINVAL;
> }
> @@ -1765,6 +1807,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
> return -EINVAL;
> group = fd_file(f)->private_data;
>
> + /* Only report mount events on mnt namespace */
> + if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
> + if (mask & ~FANOTIFY_MOUNT_EVENTS)
> + return -EINVAL;
> + if (mark_type != FAN_MARK_MNTNS)
> + return -EINVAL;
> + } else {
> + if (mask & FANOTIFY_MOUNT_EVENTS)
> + return -EINVAL;
> + if (mark_type == FAN_MARK_MNTNS)
> + return -EINVAL;
> + }
> +
> /*
> * An unprivileged user is not allowed to setup mount nor filesystem
> * marks. This also includes setting up such marks by a group that
> @@ -1802,7 +1857,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
> * point.
> */
> fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
> - if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) &&
> + if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_MOUNT_EVENTS|FANOTIFY_EVENT_FLAGS) &&
> (!fid_mode || mark_type == FAN_MARK_MOUNT))
> return -EINVAL;
>
> @@ -1848,17 +1903,21 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
> }
>
> /* inode held in place by reference to path; group by fget on fd */
> - if (mark_type == FAN_MARK_INODE) {
> + if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) {
> inode = path.dentry->d_inode;
> obj = inode;
> - } else {
> - mnt = path.mnt;
> - if (mark_type == FAN_MARK_MOUNT)
> - obj = mnt;
> - else
> - obj = mnt->mnt_sb;
> + } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
> + obj = path.mnt;
> + } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) {
> + obj = path.mnt->mnt_sb;
> + } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) {
> + obj = mnt_ns_from_dentry(path.dentry);
> }
>
> + ret = -EINVAL;
> + if (!obj)
> + goto path_put_and_out;
> +
> /*
> * If some other task has this inode open for write we should not add
> * an ignore mask, unless that ignore mask is supposed to survive
> @@ -1866,10 +1925,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
> */
> if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) &&
> !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) {
> - ret = mnt ? -EINVAL : -EISDIR;
> + ret = !inode ? -EINVAL : -EISDIR;
> /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
> if (ignore == FAN_MARK_IGNORE &&
> - (mnt || S_ISDIR(inode->i_mode)))
> + (!inode || S_ISDIR(inode->i_mode)))
> goto path_put_and_out;
>
> ret = 0;
> @@ -1878,7 +1937,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
> }
>
> /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
> - if (mnt || !S_ISDIR(inode->i_mode)) {
> + if (!inode || !S_ISDIR(inode->i_mode)) {
> mask &= ~FAN_EVENT_ON_CHILD;
> umask = FAN_EVENT_ON_CHILD;
> /*
> @@ -1952,7 +2011,7 @@ static int __init fanotify_user_setup(void)
> FANOTIFY_DEFAULT_MAX_USER_MARKS);
>
> BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
> - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 13);
> + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14);
> BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);
>
> fanotify_mark_cache = KMEM_CACHE(fanotify_mark,
> @@ -1965,6 +2024,7 @@ static int __init fanotify_user_setup(void)
> fanotify_perm_event_cachep =
> KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
> }
> + fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC);
>
> fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS;
> init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] =
> diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
> index e933f9c65d90..1161eabf11ee 100644
> --- a/fs/notify/fdinfo.c
> +++ b/fs/notify/fdinfo.c
> @@ -121,6 +121,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
>
> seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
> sb->s_dev, mflags, mark->mask, mark->ignore_mask);
> + } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_MNTNS) {
> + struct mnt_namespace *mnt_ns = fsnotify_conn_mntns(mark->connector);
> +
> + seq_printf(m, "fanotify mnt_ns:%u mflags:%x mask:%x ignored_mask:%x\n",
> + mnt_ns->ns.inum, mflags, mark->mask, mark->ignore_mask);
> }
> }
>
> diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
> index 89ff45bd6f01..fc142be2542d 100644
> --- a/include/linux/fanotify.h
> +++ b/include/linux/fanotify.h
> @@ -25,7 +25,7 @@
>
> #define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET)
>
> -#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD)
> +#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD | FAN_REPORT_MNT)
>
> /*
> * fanotify_init() flags that require CAP_SYS_ADMIN.
> @@ -38,7 +38,8 @@
> FAN_REPORT_PIDFD | \
> FAN_REPORT_FD_ERROR | \
> FAN_UNLIMITED_QUEUE | \
> - FAN_UNLIMITED_MARKS)
> + FAN_UNLIMITED_MARKS | \
> + FAN_REPORT_MNT)
>
> /*
> * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN.
> @@ -58,7 +59,7 @@
> #define FANOTIFY_INTERNAL_GROUP_FLAGS (FANOTIFY_UNPRIV)
>
> #define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \
> - FAN_MARK_FILESYSTEM)
> + FAN_MARK_FILESYSTEM | FAN_MARK_MNTNS)
>
> #define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \
> FAN_MARK_FLUSH)
> @@ -99,10 +100,13 @@
> /* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */
> #define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR)
>
> +#define FANOTIFY_MOUNT_EVENTS (FAN_MNT_ATTACH | FAN_MNT_DETACH)
> +
> /* Events that user can request to be notified on */
> #define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \
> FANOTIFY_INODE_EVENTS | \
> - FANOTIFY_ERROR_EVENTS)
> + FANOTIFY_ERROR_EVENTS | \
> + FANOTIFY_MOUNT_EVENTS)
>
> /* Events that require a permission response from user */
> #define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM | \
> diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
> index 34f221d3a1b9..69340e483ae7 100644
> --- a/include/uapi/linux/fanotify.h
> +++ b/include/uapi/linux/fanotify.h
> @@ -25,6 +25,8 @@
> #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */
> #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */
> #define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */
> +#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
> +#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
>
> #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */
>
> @@ -61,6 +63,7 @@
> #define FAN_REPORT_NAME 0x00000800 /* Report events with name */
> #define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */
> #define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */
> +#define FAN_REPORT_MNT 0x00004000 /* Report mount events */
>
> /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
> #define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
> @@ -91,6 +94,7 @@
> #define FAN_MARK_INODE 0x00000000
> #define FAN_MARK_MOUNT 0x00000010
> #define FAN_MARK_FILESYSTEM 0x00000100
> +#define FAN_MARK_MNTNS 0x00000110
>
> /*
> * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY
> @@ -143,6 +147,7 @@ struct fanotify_event_metadata {
> #define FAN_EVENT_INFO_TYPE_DFID 3
> #define FAN_EVENT_INFO_TYPE_PIDFD 4
> #define FAN_EVENT_INFO_TYPE_ERROR 5
> +#define FAN_EVENT_INFO_TYPE_MNT 6
>
> /* Special info types for FAN_RENAME */
> #define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10
> @@ -189,6 +194,11 @@ struct fanotify_event_info_error {
> __u32 error_count;
> };
>
> +struct fanotify_event_info_mnt {
> + struct fanotify_event_info_header hdr;
> + __u64 mnt_id;
> +};
> +
> /*
> * User space may need to record additional information about its decision.
> * The extra information type records what kind of information is included.
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 171dd7fceac5..d2b3e60e2be9 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -3395,6 +3395,10 @@ static int selinux_path_notify(const struct path *path, u64 mask,
> case FSNOTIFY_OBJ_TYPE_INODE:
> perm = FILE__WATCH;
> break;
> + case FSNOTIFY_OBJ_TYPE_MNTNS:
> + /* FIXME: Is this correct??? */
> + perm = FILE__WATCH;
> + break;
If I understand the commit description correctly,
security_path_notify(path, mask, FSNOTIFY_OBJ_TYPE_MNTNS) indicates a
change in the mount namespace indicated by the @path parameter, with
the initial mntns changes being limited to attach/detach and possibly
some other attributes (see patch 4/4), although the latter looks like
it will probably happen at a later date.
My initial thinking is that if we limit ourselves to existing SELinux
policy permissions, this is much more of FILE__WATCH_MOUNT operation
rather than a FILE__WATCH operation as while the /proc/PID/ns/mnt file
specified in @path is simply a file, it represents much more than
that. However, it we want to consider adding a new SELinux policy
permission (which is easy to do), we may want to consider adding a new
mount namespace specific permission, e.g. FILE__WATCH_MOUNTNS, this
would make it easier for policy developers to distinguish between
watching a traditional mount point and a mount namespace (although
given the common approaches to labeling this may not be very
significant). I'd personally like to hear from the SELinux policy
folks on this (the SELinux reference policy has also been CC'd).
If we reuse the file/watch_mount permission the policy rule would look
something like below where <subject> is the SELinux domain of the
process making the change, and <mntns_label> is the label of the
/proc/PID/ns/mnt file:
allow <subject> <mntns_label>:file { watch_mount };
If we add a new file/watch_mountns permission the policy rule would
look like this:
allow <subject> <mntns_label>:file { watch_mountns };
> default:
> return -EINVAL;
> }
> --
> 2.47.1
--
paul-moore.com
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v4 2/4] fanotify: notify on mount attach and detach
2025-01-24 19:38 ` Paul Moore
@ 2025-01-25 1:09 ` Russell Coker
2025-01-28 12:42 ` Miklos Szeredi
2025-01-28 18:11 ` Daniel Burgener
1 sibling, 1 reply; 14+ messages in thread
From: Russell Coker @ 2025-01-25 1:09 UTC (permalink / raw)
To: Miklos Szeredi, Paul Moore
Cc: linux-fsdevel, Christian Brauner, Jan Kara, Amir Goldstein,
Karel Zak, Lennart Poettering, Ian Kent, Al Viro,
linux-security-module, selinux, selinux-refpolicy
On Saturday, 25 January 2025 06:38:45 AEDT Paul Moore wrote:
> My initial thinking is that if we limit ourselves to existing SELinux
> policy permissions, this is much more of FILE__WATCH_MOUNT operation
> rather than a FILE__WATCH operation as while the /proc/PID/ns/mnt file
> specified in @path is simply a file, it represents much more than
> that. However, it we want to consider adding a new SELinux policy
> permission (which is easy to do), we may want to consider adding a new
> mount namespace specific permission, e.g. FILE__WATCH_MOUNTNS, this
> would make it easier for policy developers to distinguish between
> watching a traditional mount point and a mount namespace (although
> given the common approaches to labeling this may not be very
> significant). I'd personally like to hear from the SELinux policy
> folks on this (the SELinux reference policy has also been CC'd).
>
> If we reuse the file/watch_mount permission the policy rule would look
> something like below where <subject> is the SELinux domain of the
> process making the change, and <mntns_label> is the label of the
> /proc/PID/ns/mnt file:
>
> allow <subject> <mntns_label>:file { watch_mount };
>
> If we add a new file/watch_mountns permission the policy rule would
> look like this:
>
> allow <subject> <mntns_label>:file { watch_mountns };
What's the benefit in watching mount being separate from watching a namespace
mount?
In what situation could a process be permitted one of those but not the other?
--
My Main Blog http://etbe.coker.com.au/
My Documents Blog http://doc.coker.com.au/
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v4 2/4] fanotify: notify on mount attach and detach
2025-01-25 1:09 ` Russell Coker
@ 2025-01-28 12:42 ` Miklos Szeredi
2025-01-28 13:37 ` Miklos Szeredi
0 siblings, 1 reply; 14+ messages in thread
From: Miklos Szeredi @ 2025-01-28 12:42 UTC (permalink / raw)
To: russell
Cc: Miklos Szeredi, Paul Moore, linux-fsdevel, Christian Brauner,
Jan Kara, Amir Goldstein, Karel Zak, Lennart Poettering, Ian Kent,
Al Viro, linux-security-module, selinux, selinux-refpolicy
On Sat, 25 Jan 2025 at 02:17, Russell Coker <russell@coker.com.au> wrote:
> What's the benefit in watching mount being separate from watching a namespace
> mount?
1)
fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_MOUNT, FAN_OPEN,
AT_FDCWD, "/proc/self/ns/mnt");
This notifies on mount and unmount events in the current mount namespace.
2)
fanotify_mark(fan, FAN_MARK_ADD | FAN_MARK_MOUNT, FAN_OPEN, AT_FDCWD,
"/proc/self/ns/mnt");
This notifies on open events within the nsfs mount (proc uses a kernel
private nsfs mount, so all accesses through proc will trigger this).
The latter doesn't really make sense (these files are not openable),
but it's doable with current kernels and events on the failed opens do
get generated.
So overloading FILE__WATCH_MOUNT might work, but it is also very
confusing, since watching a mount namespace and watching a mount mean
completely different things.
Thanks,
Miklos
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4 2/4] fanotify: notify on mount attach and detach
2025-01-28 12:42 ` Miklos Szeredi
@ 2025-01-28 13:37 ` Miklos Szeredi
0 siblings, 0 replies; 14+ messages in thread
From: Miklos Szeredi @ 2025-01-28 13:37 UTC (permalink / raw)
To: russell
Cc: Miklos Szeredi, Paul Moore, linux-fsdevel, Christian Brauner,
Jan Kara, Amir Goldstein, Karel Zak, Lennart Poettering, Ian Kent,
Al Viro, linux-security-module, selinux, selinux-refpolicy
On Tue, 28 Jan 2025 at 13:42, Miklos Szeredi <miklos@szeredi.hu> wrote:
> fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_MOUNT, FAN_OPEN,
> AT_FDCWD, "/proc/self/ns/mnt");
Sorry, this should have been:
1)
fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS, FAN_MNT_ATTACH |
FAN_MNT_DETACH, AT_FDCWD, "/proc/self/ns/mnt");
This notifies on mount and unmount events in the current mount namespace.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4 2/4] fanotify: notify on mount attach and detach
2025-01-24 19:38 ` Paul Moore
2025-01-25 1:09 ` Russell Coker
@ 2025-01-28 18:11 ` Daniel Burgener
1 sibling, 0 replies; 14+ messages in thread
From: Daniel Burgener @ 2025-01-28 18:11 UTC (permalink / raw)
To: Paul Moore, Miklos Szeredi
Cc: linux-fsdevel, Christian Brauner, Jan Kara, Amir Goldstein,
Karel Zak, Lennart Poettering, Ian Kent, Al Viro,
linux-security-module, selinux, selinux-refpolicy
> If I understand the commit description correctly,
> security_path_notify(path, mask, FSNOTIFY_OBJ_TYPE_MNTNS) indicates a
> change in the mount namespace indicated by the @path parameter, with
> the initial mntns changes being limited to attach/detach and possibly
> some other attributes (see patch 4/4), although the latter looks like
> it will probably happen at a later date.
>
> My initial thinking is that if we limit ourselves to existing SELinux
> policy permissions, this is much more of FILE__WATCH_MOUNT operation
> rather than a FILE__WATCH operation as while the /proc/PID/ns/mnt file
> specified in @path is simply a file, it represents much more than
> that. However, it we want to consider adding a new SELinux policy
> permission (which is easy to do), we may want to consider adding a new
> mount namespace specific permission, e.g. FILE__WATCH_MOUNTNS, this
> would make it easier for policy developers to distinguish between
> watching a traditional mount point and a mount namespace (although
> given the common approaches to labeling this may not be very
> significant). I'd personally like to hear from the SELinux policy
> folks on this (the SELinux reference policy has also been CC'd).
>
> If we reuse the file/watch_mount permission the policy rule would look
> something like below where <subject> is the SELinux domain of the
> process making the change, and <mntns_label> is the label of the
> /proc/PID/ns/mnt file:
>
> allow <subject> <mntns_label>:file { watch_mount };
>
> If we add a new file/watch_mountns permission the policy rule would
> look like this:
>
> allow <subject> <mntns_label>:file { watch_mountns };
>
I've gone back and forth on this a few times. If I understand it
correctly, I think we might really want to have a new permission here,
which is sad, because in my humble opinion, the watch_* permissions are
already more complicated than I like.
"watch" does seem to be the wrong thing because this grants more than
just changes to the specific file. However, "watch_mount" is a very
highly privileged operation. Allowing watch on all reads and writes in
the whole file hierarchy from a mount point is a substantial amount of
access, and seems quite a bit more substantial than just watching new
mounts being attached and detached (and similar) within a given mount
namespace.
FWIW I do think the assumption that different labeling between /proc/pid
files and mountpoints generally does make this not a problem in
practice. But in my opinion overloading watch_mount for this case seems
different from the existing watch_mount permission to warrant not doing
it. Particularly with watch_mount being such a privileged operation.
-Daniel
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v4 3/4] vfs: add notifications for mount attach and detach
2025-01-23 19:41 [PATCH v4 0/4] mount notification Miklos Szeredi
2025-01-23 19:41 ` [PATCH v4 1/4] fsnotify: add mount notification infrastructure Miklos Szeredi
2025-01-23 19:41 ` [PATCH v4 2/4] fanotify: notify on mount attach and detach Miklos Szeredi
@ 2025-01-23 19:41 ` Miklos Szeredi
2025-01-23 19:41 ` [PATCH v4 4/4] vfs: add notifications for mount attribute change Miklos Szeredi
3 siblings, 0 replies; 14+ messages in thread
From: Miklos Szeredi @ 2025-01-23 19:41 UTC (permalink / raw)
To: linux-fsdevel
Cc: Christian Brauner, Jan Kara, Amir Goldstein, Karel Zak,
Lennart Poettering, Ian Kent, Al Viro, linux-security-module,
Paul Moore
Add notifications for attaching and detaching mounts to fs/namespace.c
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
fs/mount.h | 20 +++++++++++++
fs/namespace.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++-
fs/pnode.c | 4 ++-
3 files changed, 101 insertions(+), 2 deletions(-)
diff --git a/fs/mount.h b/fs/mount.h
index 9689e7bf4501..7dd22a226a6e 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -5,6 +5,8 @@
#include <linux/ns_common.h>
#include <linux/fs_pin.h>
+extern struct list_head notify_list;
+
struct mnt_namespace {
struct ns_common ns;
struct mount * root;
@@ -72,6 +74,8 @@ struct mount {
#ifdef CONFIG_FSNOTIFY
struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
__u32 mnt_fsnotify_mask;
+ struct list_head to_notify; /* need to queue notification */
+ struct mnt_namespace *prev_ns; /* previous namespace (NULL if none) */
#endif
int mnt_id; /* mount identifier, reused */
u64 mnt_id_unique; /* mount ID unique until reboot */
@@ -175,4 +179,20 @@ static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
return container_of(ns, struct mnt_namespace, ns);
}
+#ifdef CONFIG_FSNOTIFY
+static inline void mnt_notify_add(struct mount *m)
+{
+ /* Optimize the case where there are no watches */
+ if ((m->mnt_ns && m->mnt_ns->n_fsnotify_marks) ||
+ (m->prev_ns && m->prev_ns->n_fsnotify_marks))
+ list_add_tail(&m->to_notify, ¬ify_list);
+ else
+ m->prev_ns = m->mnt_ns;
+}
+#else
+static inline void mnt_notify_add(struct mount *m)
+{
+}
+#endif
+
struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);
diff --git a/fs/namespace.c b/fs/namespace.c
index 4d9072fd1263..948348a37f6c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -79,6 +79,9 @@ static struct kmem_cache *mnt_cache __ro_after_init;
static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
+#ifdef CONFIG_FSNOTIFY
+LIST_HEAD(notify_list); /* protected by namespace_sem */
+#endif
static DEFINE_RWLOCK(mnt_ns_tree_lock);
static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
@@ -145,6 +148,7 @@ static void mnt_ns_release(struct mnt_namespace *ns)
/* keep alive for {list,stat}mount() */
if (refcount_dec_and_test(&ns->passive)) {
+ fsnotify_mntns_delete(ns);
put_user_ns(ns->user_ns);
kfree(ns);
}
@@ -1136,6 +1140,8 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt)
}
rb_link_node(&mnt->mnt_node, parent, link);
rb_insert_color(&mnt->mnt_node, &ns->mounts);
+
+ mnt_notify_add(mnt);
}
/*
@@ -1683,6 +1689,50 @@ int may_umount(struct vfsmount *mnt)
EXPORT_SYMBOL(may_umount);
+#ifdef CONFIG_FSNOTIFY
+static void mnt_notify(struct mount *p)
+{
+ if (!p->prev_ns && p->mnt_ns) {
+ fsnotify_mnt_attach(p->mnt_ns, &p->mnt);
+ } else if (p->prev_ns && !p->mnt_ns) {
+ fsnotify_mnt_detach(p->prev_ns, &p->mnt);
+ } else if (p->prev_ns == p->mnt_ns) {
+ fsnotify_mnt_move(p->mnt_ns, &p->mnt);
+ } else {
+ fsnotify_mnt_detach(p->prev_ns, &p->mnt);
+ fsnotify_mnt_attach(p->mnt_ns, &p->mnt);
+ }
+ p->prev_ns = p->mnt_ns;
+}
+
+static void notify_mnt_list(void)
+{
+ struct mount *m, *tmp;
+ /*
+ * Notify about mounts that were added/reparented/detached/remain
+ * connected after unmount.
+ */
+ list_for_each_entry_safe(m, tmp, ¬ify_list, to_notify) {
+ mnt_notify(m);
+ list_del_init(&m->to_notify);
+ }
+}
+
+static bool need_notify_mnt_list(void)
+{
+ return !list_empty(¬ify_list);
+}
+#else
+static void notify_mnt_list(void)
+{
+}
+
+static bool need_notify_mnt_list(void)
+{
+ return false;
+}
+#endif
+
static void namespace_unlock(void)
{
struct hlist_head head;
@@ -1693,7 +1743,18 @@ static void namespace_unlock(void)
hlist_move_list(&unmounted, &head);
list_splice_init(&ex_mountpoints, &list);
- up_write(&namespace_sem);
+ if (need_notify_mnt_list()) {
+ /*
+ * No point blocking out concurrent readers while notifications
+ * are sent. This will also allow statmount()/listmount() to run
+ * concurrently.
+ */
+ downgrade_write(&namespace_sem);
+ notify_mnt_list();
+ up_read(&namespace_sem);
+ } else {
+ up_write(&namespace_sem);
+ }
shrink_dentry_list(&list);
@@ -1806,6 +1867,19 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
change_mnt_propagation(p, MS_PRIVATE);
if (disconnect)
hlist_add_head(&p->mnt_umount, &unmounted);
+
+ /*
+ * At this point p->mnt_ns is NULL, notification will be queued
+ * only if
+ *
+ * - p->prev_ns is non-NULL *and*
+ * - p->prev_ns->n_fsnotify_marks is non-NULL
+ *
+ * This will preclude queuing the mount if this is a cleanup
+ * after a failed copy_tree() or destruction of an anonymous
+ * namespace, etc.
+ */
+ mnt_notify_add(p);
}
}
@@ -2511,6 +2585,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
dest_mp = smp;
unhash_mnt(source_mnt);
attach_mnt(source_mnt, top_mnt, dest_mp, beneath);
+ mnt_notify_add(source_mnt);
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
if (source_mnt->mnt_ns) {
@@ -4426,6 +4501,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
list_del_init(&new_mnt->mnt_expire);
put_mountpoint(root_mp);
unlock_mount_hash();
+ mnt_notify_add(root_mnt);
+ mnt_notify_add(new_mnt);
chroot_fs_refs(&root, &new);
error = 0;
out4:
diff --git a/fs/pnode.c b/fs/pnode.c
index a799e0315cc9..d42b71c3567a 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -549,8 +549,10 @@ static void restore_mounts(struct list_head *to_restore)
mp = parent->mnt_mp;
parent = parent->mnt_parent;
}
- if (parent != mnt->mnt_parent)
+ if (parent != mnt->mnt_parent) {
mnt_change_mountpoint(parent, mp, mnt);
+ mnt_notify_add(mnt);
+ }
}
}
--
2.47.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH v4 4/4] vfs: add notifications for mount attribute change
2025-01-23 19:41 [PATCH v4 0/4] mount notification Miklos Szeredi
` (2 preceding siblings ...)
2025-01-23 19:41 ` [PATCH v4 3/4] vfs: add notifications for " Miklos Szeredi
@ 2025-01-23 19:41 ` Miklos Szeredi
2025-01-24 9:09 ` Amir Goldstein
2025-01-24 15:38 ` Christian Brauner
3 siblings, 2 replies; 14+ messages in thread
From: Miklos Szeredi @ 2025-01-23 19:41 UTC (permalink / raw)
To: linux-fsdevel
Cc: Christian Brauner, Jan Kara, Amir Goldstein, Karel Zak,
Lennart Poettering, Ian Kent, Al Viro, linux-security-module,
Paul Moore
Notify when mount flags, propagation or idmap changes.
Just like attach and detach, no details are given in the notification, only
the mount ID.
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
fs/namespace.c | 27 +++++++++++++++++++++++++++
fs/notify/fanotify/fanotify.c | 2 +-
fs/notify/fanotify/fanotify.h | 2 +-
fs/notify/fsnotify.c | 2 +-
include/linux/fanotify.h | 2 +-
include/linux/fsnotify.h | 5 +++++
include/linux/fsnotify_backend.h | 5 ++++-
include/uapi/linux/fanotify.h | 1 +
8 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index 948348a37f6c..9b9b13665dce 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2807,6 +2807,9 @@ static int do_change_type(struct path *path, int ms_flags)
change_mnt_propagation(m, type);
unlock_mount_hash();
+ for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
+ fsnotify_mnt_change(m->mnt_ns, &m->mnt);
+
out_unlock:
namespace_unlock();
return err;
@@ -3089,6 +3092,12 @@ static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
unlock_mount_hash();
up_read(&sb->s_umount);
+ if (!ret) {
+ down_read(&namespace_sem);
+ fsnotify_mnt_change(mnt->mnt_ns, &mnt->mnt);
+ up_read(&namespace_sem);
+ }
+
mnt_warn_timestamp_expiry(path, &mnt->mnt);
return ret;
@@ -3141,6 +3150,13 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
up_write(&sb->s_umount);
}
+ if (!err) {
+ down_read(&namespace_sem);
+ fsnotify_mnt_change(mnt->mnt_ns, &mnt->mnt);
+ up_read(&namespace_sem);
+ }
+
+
mnt_warn_timestamp_expiry(path, &mnt->mnt);
put_fs_context(fc);
@@ -4708,6 +4724,8 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
return err;
}
}
+ } else {
+ down_read(&namespace_sem);
}
err = -EINVAL;
@@ -4743,10 +4761,19 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
out:
unlock_mount_hash();
+ if (!err) {
+ struct mount *m;
+
+ for (m = mnt; m; m = kattr->recurse ? next_mnt(m, mnt) : NULL)
+ fsnotify_mnt_change(m->mnt_ns, &m->mnt);
+ }
+
if (kattr->propagation) {
if (err)
cleanup_group_ids(mnt, NULL);
namespace_unlock();
+ } else {
+ up_read(&namespace_sem);
}
return err;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index b1937f92f105..c7ddd145f3d8 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -934,7 +934,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
- BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 23);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 24);
mask = fanotify_group_event_mask(group, iter_info, &match_mask,
mask, data, data_type, dir);
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index f1a7cbedc9e3..8d6289da06f1 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -471,7 +471,7 @@ static inline bool fanotify_is_error_event(u32 mask)
static inline bool fanotify_is_mnt_event(u32 mask)
{
- return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ return mask & FANOTIFY_MOUNT_EVENTS;
}
static inline const struct path *fanotify_event_path(struct fanotify_event *event)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 2b2c3fd907c7..5872dd27172d 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -660,7 +660,7 @@ static __init int fsnotify_init(void)
{
int ret;
- BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26);
ret = init_srcu_struct(&fsnotify_mark_srcu);
if (ret)
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index fc142be2542d..61e112d25303 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -100,7 +100,7 @@
/* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */
#define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR)
-#define FANOTIFY_MOUNT_EVENTS (FAN_MNT_ATTACH | FAN_MNT_DETACH)
+#define FANOTIFY_MOUNT_EVENTS (FAN_MNT_ATTACH | FAN_MNT_DETACH | FAN_MNT_CHANGE)
/* Events that user can request to be notified on */
#define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index ea998551dd0d..ba3e05c69aaa 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -483,4 +483,9 @@ static inline void fsnotify_mnt_move(struct mnt_namespace *ns, struct vfsmount *
fsnotify_mnt(FS_MNT_MOVE, ns, mnt);
}
+static inline void fsnotify_mnt_change(struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+ fsnotify_mnt(FS_MNT_CHANGE, ns, mnt);
+}
+
#endif /* _LINUX_FS_NOTIFY_H */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 6c3e3a4a7b10..54e01803e309 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -58,6 +58,8 @@
#define FS_MNT_ATTACH 0x01000000 /* Mount was attached */
#define FS_MNT_DETACH 0x02000000 /* Mount was detached */
+#define FS_MNT_CHANGE 0x04000000 /* Mount was changed */
+
#define FS_MNT_MOVE (FS_MNT_ATTACH | FS_MNT_DETACH)
/*
@@ -106,7 +108,8 @@
FS_EVENTS_POSS_ON_CHILD | \
FS_DELETE_SELF | FS_MOVE_SELF | \
FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
- FS_ERROR | FS_MNT_ATTACH | FS_MNT_DETACH)
+ FS_ERROR | \
+ FS_MNT_ATTACH | FS_MNT_DETACH | FS_MNT_CHANGE )
/* Extra flags that may be reported with event or control handling of events */
#define ALL_FSNOTIFY_FLAGS (FS_ISDIR | FS_EVENT_ON_CHILD | FS_DN_MULTISHOT)
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index 69340e483ae7..256fc5755b45 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -27,6 +27,7 @@
#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */
#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
+#define FAN_MNT_CHANGE 0x04000000 /* Mount was changed */
#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */
--
2.47.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH v4 4/4] vfs: add notifications for mount attribute change
2025-01-23 19:41 ` [PATCH v4 4/4] vfs: add notifications for mount attribute change Miklos Szeredi
@ 2025-01-24 9:09 ` Amir Goldstein
2025-01-24 15:38 ` Christian Brauner
1 sibling, 0 replies; 14+ messages in thread
From: Amir Goldstein @ 2025-01-24 9:09 UTC (permalink / raw)
To: Miklos Szeredi
Cc: linux-fsdevel, Christian Brauner, Jan Kara, Karel Zak,
Lennart Poettering, Ian Kent, Al Viro, linux-security-module,
Paul Moore
On Thu, Jan 23, 2025 at 8:41 PM Miklos Szeredi <mszeredi@redhat.com> wrote:
>
> Notify when mount flags, propagation or idmap changes.
>
> Just like attach and detach, no details are given in the notification, only
> the mount ID.
>
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
My only nit this time is that I prefer the fsnotify/fanotify bits here
to be in patches 1,2
which as you write, only add the infrastructure to be used later.
[...]
> --- a/fs/notify/fanotify/fanotify.h
> +++ b/fs/notify/fanotify/fanotify.h
> @@ -471,7 +471,7 @@ static inline bool fanotify_is_error_event(u32 mask)
>
> static inline bool fanotify_is_mnt_event(u32 mask)
> {
> - return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH);
> + return mask & FANOTIFY_MOUNT_EVENTS;
> }
>
This should have used the macro from the first use in patch 2.
[...]
> diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
> index 6c3e3a4a7b10..54e01803e309 100644
> --- a/include/linux/fsnotify_backend.h
> +++ b/include/linux/fsnotify_backend.h
> @@ -58,6 +58,8 @@
>
> #define FS_MNT_ATTACH 0x01000000 /* Mount was attached */
> #define FS_MNT_DETACH 0x02000000 /* Mount was detached */
> +#define FS_MNT_CHANGE 0x04000000 /* Mount was changed */
> +
> #define FS_MNT_MOVE (FS_MNT_ATTACH | FS_MNT_DETACH)
>
> /*
> @@ -106,7 +108,8 @@
> FS_EVENTS_POSS_ON_CHILD | \
> FS_DELETE_SELF | FS_MOVE_SELF | \
> FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
> - FS_ERROR | FS_MNT_ATTACH | FS_MNT_DETACH)
> + FS_ERROR | \
> + FS_MNT_ATTACH | FS_MNT_DETACH | FS_MNT_CHANGE )
Please add those bits as a group in patch 1:
@@ -80,6 +80,9 @@
*/
#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE |
FS_RENAME)
+/* Mount namespace events */
+#define FSNOTIFY_MNT_EVENTS (FS_MNT_ATTACH | FS_MNT_DETACH | FS_MNT_CHANGE)
+
/* Content events can be used to inspect file content */
#define FSNOTIFY_CONTENT_PERM_EVENTS (FS_OPEN_PERM | FS_OPEN_EXEC_PERM | \
FS_ACCESS_PERM)
@@ -108,6 +111,7 @@
/* Events that can be reported to backends */
#define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \
+ FSNOTIFY_MNT_EVENTS | \
FS_EVENTS_POSS_ON_CHILD | \
I am aware of the inconsistency of the names ALL_FSNOTIFY_* and FSNOTIFY_*
but if you look at master as of last night you will find:
FSNOTIFY_CONTENT_PERM_EVENTS and FSNOTIFY_PRE_CONTENT_EVENTS
(please rebase)
One day we may cleanup ALL_FSNOTIFY_DIRENT_EVENTS and
ALL_FSNOTIFY_PERM_EVENTS to conform.
Thanks,
Amir.
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v4 4/4] vfs: add notifications for mount attribute change
2025-01-23 19:41 ` [PATCH v4 4/4] vfs: add notifications for mount attribute change Miklos Szeredi
2025-01-24 9:09 ` Amir Goldstein
@ 2025-01-24 15:38 ` Christian Brauner
2025-01-24 15:49 ` Miklos Szeredi
1 sibling, 1 reply; 14+ messages in thread
From: Christian Brauner @ 2025-01-24 15:38 UTC (permalink / raw)
To: Miklos Szeredi
Cc: linux-fsdevel, Jan Kara, Amir Goldstein, Karel Zak,
Lennart Poettering, Ian Kent, Al Viro, linux-security-module,
Paul Moore
On Thu, Jan 23, 2025 at 08:41:07PM +0100, Miklos Szeredi wrote:
> Notify when mount flags, propagation or idmap changes.
>
> Just like attach and detach, no details are given in the notification, only
> the mount ID.
>
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
I think this is a good next step but I would first go with the minimal
functionality of notifying about mount topology changes for v6.15.
Btw, if we notify in do_remount() on the mount that triggered
superblock reconfiguration then we also need to trigger in
vfs_cmd_reconfigure() aka fsconfig(FSCONFIG_CMD_RECONFIGURE) but the
mount that was used to change superblock options is only available in
fspick() currently. That would need to be handled.
But I think this patch makes more sense in follow-up releases.
> fs/namespace.c | 27 +++++++++++++++++++++++++++
> fs/notify/fanotify/fanotify.c | 2 +-
> fs/notify/fanotify/fanotify.h | 2 +-
> fs/notify/fsnotify.c | 2 +-
> include/linux/fanotify.h | 2 +-
> include/linux/fsnotify.h | 5 +++++
> include/linux/fsnotify_backend.h | 5 ++++-
> include/uapi/linux/fanotify.h | 1 +
> 8 files changed, 41 insertions(+), 5 deletions(-)
>
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 948348a37f6c..9b9b13665dce 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -2807,6 +2807,9 @@ static int do_change_type(struct path *path, int ms_flags)
> change_mnt_propagation(m, type);
> unlock_mount_hash();
>
> + for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
> + fsnotify_mnt_change(m->mnt_ns, &m->mnt);
> +
> out_unlock:
> namespace_unlock();
> return err;
> @@ -3089,6 +3092,12 @@ static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
> unlock_mount_hash();
> up_read(&sb->s_umount);
>
> + if (!ret) {
> + down_read(&namespace_sem);
> + fsnotify_mnt_change(mnt->mnt_ns, &mnt->mnt);
> + up_read(&namespace_sem);
> + }
> +
> mnt_warn_timestamp_expiry(path, &mnt->mnt);
>
> return ret;
> @@ -3141,6 +3150,13 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
> up_write(&sb->s_umount);
> }
>
> + if (!err) {
> + down_read(&namespace_sem);
> + fsnotify_mnt_change(mnt->mnt_ns, &mnt->mnt);
> + up_read(&namespace_sem);
> + }
> +
> +
> mnt_warn_timestamp_expiry(path, &mnt->mnt);
>
> put_fs_context(fc);
> @@ -4708,6 +4724,8 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
> return err;
> }
> }
> + } else {
> + down_read(&namespace_sem);
> }
>
> err = -EINVAL;
> @@ -4743,10 +4761,19 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
> out:
> unlock_mount_hash();
>
> + if (!err) {
> + struct mount *m;
> +
> + for (m = mnt; m; m = kattr->recurse ? next_mnt(m, mnt) : NULL)
> + fsnotify_mnt_change(m->mnt_ns, &m->mnt);
> + }
> +
> if (kattr->propagation) {
> if (err)
> cleanup_group_ids(mnt, NULL);
> namespace_unlock();
> + } else {
> + up_read(&namespace_sem);
> }
>
> return err;
> diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> index b1937f92f105..c7ddd145f3d8 100644
> --- a/fs/notify/fanotify/fanotify.c
> +++ b/fs/notify/fanotify/fanotify.c
> @@ -934,7 +934,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
> BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
> BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
>
> - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 23);
> + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 24);
>
> mask = fanotify_group_event_mask(group, iter_info, &match_mask,
> mask, data, data_type, dir);
> diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> index f1a7cbedc9e3..8d6289da06f1 100644
> --- a/fs/notify/fanotify/fanotify.h
> +++ b/fs/notify/fanotify/fanotify.h
> @@ -471,7 +471,7 @@ static inline bool fanotify_is_error_event(u32 mask)
>
> static inline bool fanotify_is_mnt_event(u32 mask)
> {
> - return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH);
> + return mask & FANOTIFY_MOUNT_EVENTS;
> }
>
> static inline const struct path *fanotify_event_path(struct fanotify_event *event)
> diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
> index 2b2c3fd907c7..5872dd27172d 100644
> --- a/fs/notify/fsnotify.c
> +++ b/fs/notify/fsnotify.c
> @@ -660,7 +660,7 @@ static __init int fsnotify_init(void)
> {
> int ret;
>
> - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25);
> + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26);
>
> ret = init_srcu_struct(&fsnotify_mark_srcu);
> if (ret)
> diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
> index fc142be2542d..61e112d25303 100644
> --- a/include/linux/fanotify.h
> +++ b/include/linux/fanotify.h
> @@ -100,7 +100,7 @@
> /* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */
> #define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR)
>
> -#define FANOTIFY_MOUNT_EVENTS (FAN_MNT_ATTACH | FAN_MNT_DETACH)
> +#define FANOTIFY_MOUNT_EVENTS (FAN_MNT_ATTACH | FAN_MNT_DETACH | FAN_MNT_CHANGE)
>
> /* Events that user can request to be notified on */
> #define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \
> diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
> index ea998551dd0d..ba3e05c69aaa 100644
> --- a/include/linux/fsnotify.h
> +++ b/include/linux/fsnotify.h
> @@ -483,4 +483,9 @@ static inline void fsnotify_mnt_move(struct mnt_namespace *ns, struct vfsmount *
> fsnotify_mnt(FS_MNT_MOVE, ns, mnt);
> }
>
> +static inline void fsnotify_mnt_change(struct mnt_namespace *ns, struct vfsmount *mnt)
> +{
> + fsnotify_mnt(FS_MNT_CHANGE, ns, mnt);
> +}
> +
> #endif /* _LINUX_FS_NOTIFY_H */
> diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
> index 6c3e3a4a7b10..54e01803e309 100644
> --- a/include/linux/fsnotify_backend.h
> +++ b/include/linux/fsnotify_backend.h
> @@ -58,6 +58,8 @@
>
> #define FS_MNT_ATTACH 0x01000000 /* Mount was attached */
> #define FS_MNT_DETACH 0x02000000 /* Mount was detached */
> +#define FS_MNT_CHANGE 0x04000000 /* Mount was changed */
> +
> #define FS_MNT_MOVE (FS_MNT_ATTACH | FS_MNT_DETACH)
>
> /*
> @@ -106,7 +108,8 @@
> FS_EVENTS_POSS_ON_CHILD | \
> FS_DELETE_SELF | FS_MOVE_SELF | \
> FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
> - FS_ERROR | FS_MNT_ATTACH | FS_MNT_DETACH)
> + FS_ERROR | \
> + FS_MNT_ATTACH | FS_MNT_DETACH | FS_MNT_CHANGE )
>
> /* Extra flags that may be reported with event or control handling of events */
> #define ALL_FSNOTIFY_FLAGS (FS_ISDIR | FS_EVENT_ON_CHILD | FS_DN_MULTISHOT)
> diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
> index 69340e483ae7..256fc5755b45 100644
> --- a/include/uapi/linux/fanotify.h
> +++ b/include/uapi/linux/fanotify.h
> @@ -27,6 +27,7 @@
> #define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */
> #define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
> #define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
> +#define FAN_MNT_CHANGE 0x04000000 /* Mount was changed */
>
> #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */
>
> --
> 2.47.1
>
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v4 4/4] vfs: add notifications for mount attribute change
2025-01-24 15:38 ` Christian Brauner
@ 2025-01-24 15:49 ` Miklos Szeredi
2025-01-25 9:22 ` Christian Brauner
0 siblings, 1 reply; 14+ messages in thread
From: Miklos Szeredi @ 2025-01-24 15:49 UTC (permalink / raw)
To: Christian Brauner
Cc: Miklos Szeredi, linux-fsdevel, Jan Kara, Amir Goldstein,
Karel Zak, Lennart Poettering, Ian Kent, Al Viro,
linux-security-module, Paul Moore
On Fri, 24 Jan 2025 at 16:38, Christian Brauner <brauner@kernel.org> wrote:
>
> On Thu, Jan 23, 2025 at 08:41:07PM +0100, Miklos Szeredi wrote:
> > Notify when mount flags, propagation or idmap changes.
> >
> > Just like attach and detach, no details are given in the notification, only
> > the mount ID.
> >
> > Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> > ---
>
> I think this is a good next step but I would first go with the minimal
> functionality of notifying about mount topology changes for v6.15.
I can totally relate to that. I added the fourth patch more as a
"let's see if this can also fit into the current framework".
> Btw, if we notify in do_remount() on the mount that triggered
> superblock reconfiguration then we also need to trigger in
> vfs_cmd_reconfigure() aka fsconfig(FSCONFIG_CMD_RECONFIGURE) but the
> mount that was used to change superblock options is only available in
> fspick() currently. That would need to be handled.
No, if we'd want to watch changes on super blocks, then we'd need to
iterate all the mounts of the superblock and notify each.
I'm not sure if it's something we really want, watching the super
block itself for changes sounds saner.
Thanks,
Miklos
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4 4/4] vfs: add notifications for mount attribute change
2025-01-24 15:49 ` Miklos Szeredi
@ 2025-01-25 9:22 ` Christian Brauner
0 siblings, 0 replies; 14+ messages in thread
From: Christian Brauner @ 2025-01-25 9:22 UTC (permalink / raw)
To: Miklos Szeredi
Cc: Miklos Szeredi, linux-fsdevel, Jan Kara, Amir Goldstein,
Karel Zak, Lennart Poettering, Ian Kent, Al Viro,
linux-security-module, Paul Moore
On Fri, Jan 24, 2025 at 04:49:28PM +0100, Miklos Szeredi wrote:
> On Fri, 24 Jan 2025 at 16:38, Christian Brauner <brauner@kernel.org> wrote:
> >
> > On Thu, Jan 23, 2025 at 08:41:07PM +0100, Miklos Szeredi wrote:
> > > Notify when mount flags, propagation or idmap changes.
> > >
> > > Just like attach and detach, no details are given in the notification, only
> > > the mount ID.
> > >
> > > Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> > > ---
> >
> > I think this is a good next step but I would first go with the minimal
> > functionality of notifying about mount topology changes for v6.15.
>
> I can totally relate to that. I added the fourth patch more as a
> "let's see if this can also fit into the current framework".
>
> > Btw, if we notify in do_remount() on the mount that triggered
> > superblock reconfiguration then we also need to trigger in
> > vfs_cmd_reconfigure() aka fsconfig(FSCONFIG_CMD_RECONFIGURE) but the
> > mount that was used to change superblock options is only available in
> > fspick() currently. That would need to be handled.
>
> No, if we'd want to watch changes on super blocks, then we'd need to
> iterate all the mounts of the superblock and notify each.
Ah, I remember that old remount had unclear semantics where mount
specific and superblock specific options are interleaved. So we would
need to notify from do_remount() on mount specific changes. Right, then
this change is correct and I agree about the superblock part.
^ permalink raw reply [flat|nested] 14+ messages in thread