* [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
@ 2024-05-20 21:35 Aleksa Sarai
2024-05-20 21:53 ` Jeff Layton
` (5 more replies)
0 siblings, 6 replies; 21+ messages in thread
From: Aleksa Sarai @ 2024-05-20 21:35 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
Jeff Layton, Amir Goldstein, Alexander Aring
Cc: linux-fsdevel, linux-nfs, linux-kernel, Aleksa Sarai
Now that we have stabilised the unique 64-bit mount ID interface in
statx, we can now provide a race-free way for name_to_handle_at(2) to
provide a file handle and corresponding mount without needing to worry
about racing with /proc/mountinfo parsing.
As with AT_HANDLE_FID, AT_HANDLE_UNIQUE_MNT_ID reuses a statx AT_* bit
that doesn't make sense for name_to_handle_at(2).
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
---
fs/fhandle.c | 27 +++++++++++++++++++--------
include/uapi/linux/fcntl.h | 2 ++
2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 8a7f86c2139a..6bc7ffccff8c 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -16,7 +16,8 @@
static long do_sys_name_to_handle(const struct path *path,
struct file_handle __user *ufh,
- int __user *mnt_id, int fh_flags)
+ void __user *mnt_id, bool unique_mntid,
+ int fh_flags)
{
long retval;
struct file_handle f_handle;
@@ -69,10 +70,16 @@ static long do_sys_name_to_handle(const struct path *path,
} else
retval = 0;
/* copy the mount id */
- if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) ||
- copy_to_user(ufh, handle,
- struct_size(handle, f_handle, handle_bytes)))
- retval = -EFAULT;
+ if (unique_mntid)
+ retval = put_user(real_mount(path->mnt)->mnt_id_unique,
+ (u64 __user *) mnt_id);
+ else
+ retval = put_user(real_mount(path->mnt)->mnt_id,
+ (int __user *) mnt_id);
+ /* copy the handle */
+ if (!retval)
+ retval = copy_to_user(ufh, handle,
+ struct_size(handle, f_handle, handle_bytes));
kfree(handle);
return retval;
}
@@ -83,6 +90,7 @@ static long do_sys_name_to_handle(const struct path *path,
* @name: name that should be converted to handle.
* @handle: resulting file handle
* @mnt_id: mount id of the file system containing the file
+ * (u64 if AT_HANDLE_UNIQUE_MNT_ID, otherwise int)
* @flag: flag value to indicate whether to follow symlink or not
* and whether a decodable file handle is required.
*
@@ -92,7 +100,7 @@ static long do_sys_name_to_handle(const struct path *path,
* value required.
*/
SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
- struct file_handle __user *, handle, int __user *, mnt_id,
+ struct file_handle __user *, handle, void __user *, mnt_id,
int, flag)
{
struct path path;
@@ -100,7 +108,8 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
int fh_flags;
int err;
- if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID))
+ if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID |
+ AT_HANDLE_UNIQUE_MNT_ID))
return -EINVAL;
lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
@@ -109,7 +118,9 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
lookup_flags |= LOOKUP_EMPTY;
err = user_path_at(dfd, name, lookup_flags, &path);
if (!err) {
- err = do_sys_name_to_handle(&path, handle, mnt_id, fh_flags);
+ err = do_sys_name_to_handle(&path, handle, mnt_id,
+ flag & AT_HANDLE_UNIQUE_MNT_ID,
+ fh_flags);
path_put(&path);
}
return err;
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index c0bcc185fa48..fda970f92fba 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -118,6 +118,8 @@
#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to
compare object identity and may not
be usable to open_by_handle_at(2) */
+#define AT_HANDLE_UNIQUE_MNT_ID AT_STATX_FORCE_SYNC /* returned mount id is
+ the u64 unique mount id */
#if defined(__KERNEL__)
#define AT_GETATTR_NOSEC 0x80000000
#endif
---
base-commit: 584bbf439d0fa83d728ec49f3a38c581bdc828b4
change-id: 20240515-exportfs-u64-mount-id-9ebb5c58b53c
Best regards,
--
Aleksa Sarai <cyphar@cyphar.com>
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-20 21:35 [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2) Aleksa Sarai
@ 2024-05-20 21:53 ` Jeff Layton
2024-05-20 22:27 ` Aleksa Sarai
2024-05-21 2:18 ` kernel test robot
` (4 subsequent siblings)
5 siblings, 1 reply; 21+ messages in thread
From: Jeff Layton @ 2024-05-20 21:53 UTC (permalink / raw)
To: Aleksa Sarai, Alexander Viro, Christian Brauner, Jan Kara,
Chuck Lever, Amir Goldstein, Alexander Aring
Cc: linux-fsdevel, linux-nfs, linux-kernel
On Mon, 2024-05-20 at 17:35 -0400, Aleksa Sarai wrote:
> Now that we have stabilised the unique 64-bit mount ID interface in
> statx, we can now provide a race-free way for name_to_handle_at(2) to
> provide a file handle and corresponding mount without needing to worry
> about racing with /proc/mountinfo parsing.
>
> As with AT_HANDLE_FID, AT_HANDLE_UNIQUE_MNT_ID reuses a statx AT_* bit
> that doesn't make sense for name_to_handle_at(2).
>
> Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
> ---
> fs/fhandle.c | 27 +++++++++++++++++++--------
> include/uapi/linux/fcntl.h | 2 ++
> 2 files changed, 21 insertions(+), 8 deletions(-)
>
> diff --git a/fs/fhandle.c b/fs/fhandle.c
> index 8a7f86c2139a..6bc7ffccff8c 100644
> --- a/fs/fhandle.c
> +++ b/fs/fhandle.c
> @@ -16,7 +16,8 @@
>
> static long do_sys_name_to_handle(const struct path *path,
> struct file_handle __user *ufh,
> - int __user *mnt_id, int fh_flags)
> + void __user *mnt_id, bool unique_mntid,
> + int fh_flags)
> {
> long retval;
> struct file_handle f_handle;
> @@ -69,10 +70,16 @@ static long do_sys_name_to_handle(const struct path *path,
> } else
> retval = 0;
> /* copy the mount id */
> - if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) ||
> - copy_to_user(ufh, handle,
> - struct_size(handle, f_handle, handle_bytes)))
> - retval = -EFAULT;
> + if (unique_mntid)
> + retval = put_user(real_mount(path->mnt)->mnt_id_unique,
> + (u64 __user *) mnt_id);
> + else
> + retval = put_user(real_mount(path->mnt)->mnt_id,
> + (int __user *) mnt_id);
> + /* copy the handle */
> + if (!retval)
> + retval = copy_to_user(ufh, handle,
> + struct_size(handle, f_handle, handle_bytes));
> kfree(handle);
> return retval;
> }
> @@ -83,6 +90,7 @@ static long do_sys_name_to_handle(const struct path *path,
> * @name: name that should be converted to handle.
> * @handle: resulting file handle
> * @mnt_id: mount id of the file system containing the file
> + * (u64 if AT_HANDLE_UNIQUE_MNT_ID, otherwise int)
> * @flag: flag value to indicate whether to follow symlink or not
> * and whether a decodable file handle is required.
> *
> @@ -92,7 +100,7 @@ static long do_sys_name_to_handle(const struct path *path,
> * value required.
> */
> SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
> - struct file_handle __user *, handle, int __user *, mnt_id,
> + struct file_handle __user *, handle, void __user *, mnt_id,
>
Changing the syscall signature like this is rather nasty. The new flag
seems like it should safely gate the difference, but I still have some
concerns about misuse and people passing in too small a buffer for the
mnt_id.
> int, flag)
> {
> struct path path;
> @@ -100,7 +108,8 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
> int fh_flags;
> int err;
>
> - if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID))
> + if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID |
> + AT_HANDLE_UNIQUE_MNT_ID))
> return -EINVAL;
>
> lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
> @@ -109,7 +118,9 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
> lookup_flags |= LOOKUP_EMPTY;
> err = user_path_at(dfd, name, lookup_flags, &path);
> if (!err) {
> - err = do_sys_name_to_handle(&path, handle, mnt_id, fh_flags);
> + err = do_sys_name_to_handle(&path, handle, mnt_id,
> + flag & AT_HANDLE_UNIQUE_MNT_ID,
> + fh_flags);
> path_put(&path);
> }
> return err;
> diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
> index c0bcc185fa48..fda970f92fba 100644
> --- a/include/uapi/linux/fcntl.h
> +++ b/include/uapi/linux/fcntl.h
> @@ -118,6 +118,8 @@
> #define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to
> compare object identity and may not
> be usable to open_by_handle_at(2) */
> +#define AT_HANDLE_UNIQUE_MNT_ID AT_STATX_FORCE_SYNC /* returned mount id is
> + the u64 unique mount id */
> #if defined(__KERNEL__)
> #define AT_GETATTR_NOSEC 0x80000000
> #endif
>
> ---
> base-commit: 584bbf439d0fa83d728ec49f3a38c581bdc828b4
> change-id: 20240515-exportfs-u64-mount-id-9ebb5c58b53c
>
> Best regards,
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-20 21:53 ` Jeff Layton
@ 2024-05-20 22:27 ` Aleksa Sarai
2024-05-21 5:04 ` Amir Goldstein
0 siblings, 1 reply; 21+ messages in thread
From: Aleksa Sarai @ 2024-05-20 22:27 UTC (permalink / raw)
To: Jeff Layton
Cc: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
Amir Goldstein, Alexander Aring, linux-fsdevel, linux-nfs,
linux-kernel
[-- Attachment #1: Type: text/plain, Size: 5079 bytes --]
On 2024-05-20, Jeff Layton <jlayton@kernel.org> wrote:
> On Mon, 2024-05-20 at 17:35 -0400, Aleksa Sarai wrote:
> > Now that we have stabilised the unique 64-bit mount ID interface in
> > statx, we can now provide a race-free way for name_to_handle_at(2) to
> > provide a file handle and corresponding mount without needing to worry
> > about racing with /proc/mountinfo parsing.
> >
> > As with AT_HANDLE_FID, AT_HANDLE_UNIQUE_MNT_ID reuses a statx AT_* bit
> > that doesn't make sense for name_to_handle_at(2).
> >
> > Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
> > ---
> > fs/fhandle.c | 27 +++++++++++++++++++--------
> > include/uapi/linux/fcntl.h | 2 ++
> > 2 files changed, 21 insertions(+), 8 deletions(-)
> >
> > diff --git a/fs/fhandle.c b/fs/fhandle.c
> > index 8a7f86c2139a..6bc7ffccff8c 100644
> > --- a/fs/fhandle.c
> > +++ b/fs/fhandle.c
> > @@ -16,7 +16,8 @@
> >
> > static long do_sys_name_to_handle(const struct path *path,
> > struct file_handle __user *ufh,
> > - int __user *mnt_id, int fh_flags)
> > + void __user *mnt_id, bool unique_mntid,
> > + int fh_flags)
> > {
> > long retval;
> > struct file_handle f_handle;
> > @@ -69,10 +70,16 @@ static long do_sys_name_to_handle(const struct path *path,
> > } else
> > retval = 0;
> > /* copy the mount id */
> > - if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) ||
> > - copy_to_user(ufh, handle,
> > - struct_size(handle, f_handle, handle_bytes)))
> > - retval = -EFAULT;
> > + if (unique_mntid)
> > + retval = put_user(real_mount(path->mnt)->mnt_id_unique,
> > + (u64 __user *) mnt_id);
> > + else
> > + retval = put_user(real_mount(path->mnt)->mnt_id,
> > + (int __user *) mnt_id);
> > + /* copy the handle */
> > + if (!retval)
> > + retval = copy_to_user(ufh, handle,
> > + struct_size(handle, f_handle, handle_bytes));
> > kfree(handle);
> > return retval;
> > }
> > @@ -83,6 +90,7 @@ static long do_sys_name_to_handle(const struct path *path,
> > * @name: name that should be converted to handle.
> > * @handle: resulting file handle
> > * @mnt_id: mount id of the file system containing the file
> > + * (u64 if AT_HANDLE_UNIQUE_MNT_ID, otherwise int)
> > * @flag: flag value to indicate whether to follow symlink or not
> > * and whether a decodable file handle is required.
> > *
> > @@ -92,7 +100,7 @@ static long do_sys_name_to_handle(const struct path *path,
> > * value required.
> > */
> > SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
> > - struct file_handle __user *, handle, int __user *, mnt_id,
> > + struct file_handle __user *, handle, void __user *, mnt_id,
> >
>
> Changing the syscall signature like this is rather nasty. The new flag
> seems like it should safely gate the difference, but I still have some
> concerns about misuse and people passing in too small a buffer for the
> mnt_id.
Yeah, it's a little ugly, but an name_to_handle_at2 feels like overkill
for such a minor change. I'm also not sure there's a huge risk of users
accidentally passing AT_HANDLE_UNIQUE_MNT_ID with an (int *).
> > int, flag)
> > {
> > struct path path;
> > @@ -100,7 +108,8 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
> > int fh_flags;
> > int err;
> >
> > - if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID))
> > + if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID |
> > + AT_HANDLE_UNIQUE_MNT_ID))
> > return -EINVAL;
> >
> > lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
> > @@ -109,7 +118,9 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
> > lookup_flags |= LOOKUP_EMPTY;
> > err = user_path_at(dfd, name, lookup_flags, &path);
> > if (!err) {
> > - err = do_sys_name_to_handle(&path, handle, mnt_id, fh_flags);
> > + err = do_sys_name_to_handle(&path, handle, mnt_id,
> > + flag & AT_HANDLE_UNIQUE_MNT_ID,
> > + fh_flags);
> > path_put(&path);
> > }
> > return err;
> > diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
> > index c0bcc185fa48..fda970f92fba 100644
> > --- a/include/uapi/linux/fcntl.h
> > +++ b/include/uapi/linux/fcntl.h
> > @@ -118,6 +118,8 @@
> > #define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to
> > compare object identity and may not
> > be usable to open_by_handle_at(2) */
> > +#define AT_HANDLE_UNIQUE_MNT_ID AT_STATX_FORCE_SYNC /* returned mount id is
> > + the u64 unique mount id */
> > #if defined(__KERNEL__)
> > #define AT_GETATTR_NOSEC 0x80000000
> > #endif
> >
> > ---
> > base-commit: 584bbf439d0fa83d728ec49f3a38c581bdc828b4
> > change-id: 20240515-exportfs-u64-mount-id-9ebb5c58b53c
> >
> > Best regards,
>
> --
> Jeff Layton <jlayton@kernel.org>
>
--
Aleksa Sarai
Senior Software Engineer (Containers)
SUSE Linux GmbH
<https://www.cyphar.com/>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-20 22:27 ` Aleksa Sarai
@ 2024-05-21 5:04 ` Amir Goldstein
2024-05-21 10:42 ` Aleksa Sarai
0 siblings, 1 reply; 21+ messages in thread
From: Amir Goldstein @ 2024-05-21 5:04 UTC (permalink / raw)
To: Aleksa Sarai
Cc: Jeff Layton, Alexander Viro, Christian Brauner, Jan Kara,
Chuck Lever, Alexander Aring, linux-fsdevel, linux-nfs,
linux-kernel, Christian Göttsche
On Tue, May 21, 2024 at 1:28 AM Aleksa Sarai <cyphar@cyphar.com> wrote:
>
> On 2024-05-20, Jeff Layton <jlayton@kernel.org> wrote:
> > On Mon, 2024-05-20 at 17:35 -0400, Aleksa Sarai wrote:
> > > Now that we have stabilised the unique 64-bit mount ID interface in
> > > statx, we can now provide a race-free way for name_to_handle_at(2) to
> > > provide a file handle and corresponding mount without needing to worry
> > > about racing with /proc/mountinfo parsing.
Both statx() and name_to_handle_at() support AT_EMPTY_PATH, so
there is a race-free way to get a file handle and unique mount id
for statmount().
Why do you mean /proc/mountinfo parsing?
> > >
> > > As with AT_HANDLE_FID, AT_HANDLE_UNIQUE_MNT_ID reuses a statx AT_* bit
> > > that doesn't make sense for name_to_handle_at(2).
Christian is probably regretting merging AT_HANDLE_FID now ;-)
Seriously, I would rearrange the AT_* flags namespace this way to
explicitly declare the overloaded per-syscall AT_* flags and possibly
prepare for the upcoming setxattrat(2) syscall [1].
[1] https://lore.kernel.org/linux-fsdevel/20240426162042.191916-1-cgoettsche@seltendoof.de/
The reason I would avoid overloading the AT_STATX_* flags is that
they describe a generic behavior that could potentially be relevant to
other syscalls in the future, e.g.:
renameat2(..., AT_RENAME_TEMPFILE | AT_FORCE_SYNC);
But then again, I don't understand why you need to extend name_to_handle_at()
at all for your purpose...
Thanks,
Amir.
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
[...]
+
+#define AT_PRIVATE_FLAGS 0x2ff /* Per-syscall flags mask. */
+
+/* Common flags for *at() syscalls */
#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
-#define AT_EACCESS 0x200 /* Test access permitted for
- effective IDs, not real IDs. */
-#define AT_REMOVEDIR 0x200 /* Remove directory instead of
- unlinking file. */
#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal
automount traversal */
#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
+/* Flags for statx(2) */
#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation
required from statx() */
#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */
#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to
be sync'd with the server */
[...]
#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
-/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */
-#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to
+/* Flags for name_to_handle_at(2) */
+#define AT_HANDLE_FID 0x200 /* file handle is needed to
compare object identity and may not
be usable to open_by_handle_at(2) */
+/* Flags for faccessat(2) */
+#define AT_EACCESS 0x200 /* Test access permitted for
+ effective IDs, not real IDs. */
+/* Flags for unlinkat(2) */
+#define AT_REMOVEDIR 0x200 /* Remove directory instead of
+ unlinking file. */
+
+/* Flags for renameat2(2) (should match legacy RENAME_* flags) */
+#define AT_RENAME_NOREPLACE 0x001 /* Don't overwrite target */
+#define AT_RENAME_EXCHANGE 0x002 /* Exchange source and dest */
+#define AT_RENAME_WHITEOUT 0x004 /* Whiteout source */
+#define AT_RENAME_TEMPFILE 0x008 /* Source file is O_TMPFILE */
+
+/* Flags for setxattrat(2) (should match legacy XATTR_* flags) */
+#define AT_XATTR_CREATE 0x001 /* set value, fail if
attr already exists */
+#define AT_XATTR_REPLACE 0x002 /* set value, fail if attr
does not exist */
+
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-21 5:04 ` Amir Goldstein
@ 2024-05-21 10:42 ` Aleksa Sarai
0 siblings, 0 replies; 21+ messages in thread
From: Aleksa Sarai @ 2024-05-21 10:42 UTC (permalink / raw)
To: Amir Goldstein
Cc: Jeff Layton, Alexander Viro, Christian Brauner, Jan Kara,
Chuck Lever, Alexander Aring, linux-fsdevel, linux-nfs,
linux-kernel, Christian Göttsche
[-- Attachment #1: Type: text/plain, Size: 5291 bytes --]
On 2024-05-21, Amir Goldstein <amir73il@gmail.com> wrote:
> On Tue, May 21, 2024 at 1:28 AM Aleksa Sarai <cyphar@cyphar.com> wrote:
> >
> > On 2024-05-20, Jeff Layton <jlayton@kernel.org> wrote:
> > > On Mon, 2024-05-20 at 17:35 -0400, Aleksa Sarai wrote:
> > > > Now that we have stabilised the unique 64-bit mount ID interface in
> > > > statx, we can now provide a race-free way for name_to_handle_at(2) to
> > > > provide a file handle and corresponding mount without needing to worry
> > > > about racing with /proc/mountinfo parsing.
>
> Both statx() and name_to_handle_at() support AT_EMPTY_PATH, so
> there is a race-free way to get a file handle and unique mount id
> for statmount().
Doing it that way would require doing an open and statx for every path
you want to get a filehandle for, tripling the number of syscalls you
need to do. This is related to the syscall overhead issue Lennart talked
about last week at LSF (though for his usecase we would need to add a
hashed filehandle in statx).
> Why do you mean /proc/mountinfo parsing?
The man page for name_to_handle_at(2) talks about needing to parse
/proc/mountinfo as well as the possible races you can hit.
> > > > As with AT_HANDLE_FID, AT_HANDLE_UNIQUE_MNT_ID reuses a statx AT_* bit
> > > > that doesn't make sense for name_to_handle_at(2).
>
> Christian is probably regretting merging AT_HANDLE_FID now ;-)
>
> Seriously, I would rearrange the AT_* flags namespace this way to
> explicitly declare the overloaded per-syscall AT_* flags and possibly
> prepare for the upcoming setxattrat(2) syscall [1].
I'm not sure that unifying the flag namespace is a good idea -- while it
would be nicer, burning a flag bit for an extension will be more
expensive because we would only have 32 bits for every possible
extension we ever plan to have.
FWIW, I think that statx should've had their own flag namespace like
move_mount and renameat2.
> [1] https://lore.kernel.org/linux-fsdevel/20240426162042.191916-1-cgoettsche@seltendoof.de/
>
> The reason I would avoid overloading the AT_STATX_* flags is that
> they describe a generic behavior that could potentially be relevant to
> other syscalls in the future, e.g.:
> renameat2(..., AT_RENAME_TEMPFILE | AT_FORCE_SYNC);
Yeah, you might be right that the sync-related flags aren't the right
ones to overload here.
> But then again, I don't understand why you need to extend name_to_handle_at()
> at all for your purpose...
>
> Thanks,
> Amir.
>
> --- a/include/uapi/linux/fcntl.h
> +++ b/include/uapi/linux/fcntl.h
> [...]
> +
> +#define AT_PRIVATE_FLAGS 0x2ff /* Per-syscall flags mask. */
> +
> +/* Common flags for *at() syscalls */
> #define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
> -#define AT_EACCESS 0x200 /* Test access permitted for
> - effective IDs, not real IDs. */
> -#define AT_REMOVEDIR 0x200 /* Remove directory instead of
> - unlinking file. */
> #define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
> #define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal
> automount traversal */
> #define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
>
> +/* Flags for statx(2) */
> #define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation
> required from statx() */
> #define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */
> #define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to
> be sync'd with the server */
> [...]
>
> #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
>
> -/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */
> -#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to
> +/* Flags for name_to_handle_at(2) */
> +#define AT_HANDLE_FID 0x200 /* file handle is needed to
> compare object identity and may not
> be usable to open_by_handle_at(2) */
> +/* Flags for faccessat(2) */
> +#define AT_EACCESS 0x200 /* Test access permitted for
> + effective IDs, not real IDs. */
> +/* Flags for unlinkat(2) */
> +#define AT_REMOVEDIR 0x200 /* Remove directory instead of
> + unlinking file. */
> +
> +/* Flags for renameat2(2) (should match legacy RENAME_* flags) */
> +#define AT_RENAME_NOREPLACE 0x001 /* Don't overwrite target */
> +#define AT_RENAME_EXCHANGE 0x002 /* Exchange source and dest */
> +#define AT_RENAME_WHITEOUT 0x004 /* Whiteout source */
> +#define AT_RENAME_TEMPFILE 0x008 /* Source file is O_TMPFILE */
> +
> +/* Flags for setxattrat(2) (should match legacy XATTR_* flags) */
> +#define AT_XATTR_CREATE 0x001 /* set value, fail if
> attr already exists */
> +#define AT_XATTR_REPLACE 0x002 /* set value, fail if attr
> does not exist */
> +
--
Aleksa Sarai
Senior Software Engineer (Containers)
SUSE Linux GmbH
<https://www.cyphar.com/>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-20 21:35 [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2) Aleksa Sarai
2024-05-20 21:53 ` Jeff Layton
@ 2024-05-21 2:18 ` kernel test robot
2024-05-21 3:02 ` kernel test robot
` (3 subsequent siblings)
5 siblings, 0 replies; 21+ messages in thread
From: kernel test robot @ 2024-05-21 2:18 UTC (permalink / raw)
To: Aleksa Sarai; +Cc: oe-kbuild-all
Hi Aleksa,
[This is a private test report for your RFC patch.]
kernel test robot noticed the following build errors:
[auto build test ERROR on 584bbf439d0fa83d728ec49f3a38c581bdc828b4]
url: https://github.com/intel-lab-lkp/linux/commits/Aleksa-Sarai/fhandle-expose-u64-mount-id-to-name_to_handle_at-2/20240521-053815
base: 584bbf439d0fa83d728ec49f3a38c581bdc828b4
patch link: https://lore.kernel.org/r/20240520-exportfs-u64-mount-id-v1-1-f55fd9215b8e%40cyphar.com
patch subject: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
config: openrisc-allnoconfig (https://download.01.org/0day-ci/archive/20240521/202405211049.BNpDlNlU-lkp@intel.com/config)
compiler: or1k-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240521/202405211049.BNpDlNlU-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202405211049.BNpDlNlU-lkp@intel.com/
All errors (new ones prefixed by >>):
In file included from fs/fhandle.c:2:
>> include/linux/syscalls.h:248:25: error: conflicting types for 'sys_name_to_handle_at'; have 'long int(int, const char *, struct file_handle *, void *, int)'
248 | asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
| ^~~
include/linux/syscalls.h:234:9: note: in expansion of macro '__SYSCALL_DEFINEx'
234 | __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
| ^~~~~~~~~~~~~~~~~
include/linux/syscalls.h:227:36: note: in expansion of macro 'SYSCALL_DEFINEx'
227 | #define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
| ^~~~~~~~~~~~~~~
fs/fhandle.c:102:1: note: in expansion of macro 'SYSCALL_DEFINE5'
102 | SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
| ^~~~~~~~~~~~~~~
include/linux/syscalls.h:864:17: note: previous declaration of 'sys_name_to_handle_at' with type 'long int(int, const char *, struct file_handle *, int *, int)'
864 | asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
| ^~~~~~~~~~~~~~~~~~~~~
vim +248 include/linux/syscalls.h
1bd21c6c21e848 Dominik Brodowski 2018-04-05 237
e145242ea0df6b Dominik Brodowski 2018-04-09 238 /*
e145242ea0df6b Dominik Brodowski 2018-04-09 239 * The asmlinkage stub is aliased to a function named __se_sys_*() which
e145242ea0df6b Dominik Brodowski 2018-04-09 240 * sign-extends 32-bit ints to longs whenever needed. The actual work is
e145242ea0df6b Dominik Brodowski 2018-04-09 241 * done within __do_sys_*().
e145242ea0df6b Dominik Brodowski 2018-04-09 242 */
1bd21c6c21e848 Dominik Brodowski 2018-04-05 243 #ifndef __SYSCALL_DEFINEx
bed1ffca022cc8 Frederic Weisbecker 2009-03-13 244 #define __SYSCALL_DEFINEx(x, name, ...) \
bee20031772af3 Arnd Bergmann 2018-06-19 245 __diag_push(); \
bee20031772af3 Arnd Bergmann 2018-06-19 246 __diag_ignore(GCC, 8, "-Wattribute-alias", \
bee20031772af3 Arnd Bergmann 2018-06-19 247 "Type aliasing is used to sanitize syscall arguments");\
83460ec8dcac14 Andi Kleen 2013-11-12 @248 asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
e145242ea0df6b Dominik Brodowski 2018-04-09 249 __attribute__((alias(__stringify(__se_sys##name)))); \
c9a211951c7c79 Howard McLauchlan 2018-03-21 250 ALLOW_ERROR_INJECTION(sys##name, ERRNO); \
e145242ea0df6b Dominik Brodowski 2018-04-09 251 static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
e145242ea0df6b Dominik Brodowski 2018-04-09 252 asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
e145242ea0df6b Dominik Brodowski 2018-04-09 253 asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
1a94bc34768e46 Heiko Carstens 2009-01-14 254 { \
e145242ea0df6b Dominik Brodowski 2018-04-09 255 long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));\
07fe6e00f6cca6 Al Viro 2013-01-21 256 __MAP(x,__SC_TEST,__VA_ARGS__); \
2cf0966683430b Al Viro 2013-01-21 257 __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
2cf0966683430b Al Viro 2013-01-21 258 return ret; \
1a94bc34768e46 Heiko Carstens 2009-01-14 259 } \
bee20031772af3 Arnd Bergmann 2018-06-19 260 __diag_pop(); \
e145242ea0df6b Dominik Brodowski 2018-04-09 261 static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
1bd21c6c21e848 Dominik Brodowski 2018-04-05 262 #endif /* __SYSCALL_DEFINEx */
1a94bc34768e46 Heiko Carstens 2009-01-14 263
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-20 21:35 [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2) Aleksa Sarai
2024-05-20 21:53 ` Jeff Layton
2024-05-21 2:18 ` kernel test robot
@ 2024-05-21 3:02 ` kernel test robot
2024-05-21 13:45 ` Christian Brauner
` (2 subsequent siblings)
5 siblings, 0 replies; 21+ messages in thread
From: kernel test robot @ 2024-05-21 3:02 UTC (permalink / raw)
To: Aleksa Sarai; +Cc: llvm, oe-kbuild-all
Hi Aleksa,
[This is a private test report for your RFC patch.]
kernel test robot noticed the following build errors:
[auto build test ERROR on 584bbf439d0fa83d728ec49f3a38c581bdc828b4]
url: https://github.com/intel-lab-lkp/linux/commits/Aleksa-Sarai/fhandle-expose-u64-mount-id-to-name_to_handle_at-2/20240521-053815
base: 584bbf439d0fa83d728ec49f3a38c581bdc828b4
patch link: https://lore.kernel.org/r/20240520-exportfs-u64-mount-id-v1-1-f55fd9215b8e%40cyphar.com
patch subject: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
config: arm-allnoconfig (https://download.01.org/0day-ci/archive/20240521/202405211019.3ibjIxdf-lkp@intel.com/config)
compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project fa9b1be45088dce1e4b602d451f118128b94237b)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240521/202405211019.3ibjIxdf-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202405211019.3ibjIxdf-lkp@intel.com/
All errors (new ones prefixed by >>):
In file included from fs/fhandle.c:2:
In file included from include/linux/syscalls.h:93:
In file included from include/trace/syscall.h:7:
In file included from include/linux/trace_events.h:6:
In file included from include/linux/ring_buffer.h:5:
In file included from include/linux/mm.h:2210:
include/linux/vmstat.h:522:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
522 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
| ~~~~~~~~~~~ ^ ~~~
>> fs/fhandle.c:102:1: error: conflicting types for 'sys_name_to_handle_at'
102 | SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
| ^
include/linux/syscalls.h:227:36: note: expanded from macro 'SYSCALL_DEFINE5'
227 | #define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
| ^
include/linux/syscalls.h:234:2: note: expanded from macro 'SYSCALL_DEFINEx'
234 | __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
| ^
include/linux/syscalls.h:248:18: note: expanded from macro '__SYSCALL_DEFINEx'
248 | asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
| ^
<scratch space>:17:1: note: expanded from here
17 | sys_name_to_handle_at
| ^
include/linux/syscalls.h:864:17: note: previous declaration is here
864 | asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
| ^
1 warning and 1 error generated.
vim +/sys_name_to_handle_at +102 fs/fhandle.c
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 86
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 87 /**
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 88 * sys_name_to_handle_at: convert name to handle
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 89 * @dfd: directory relative to which name is interpreted if not absolute
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 90 * @name: name that should be converted to handle.
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 91 * @handle: resulting file handle
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 92 * @mnt_id: mount id of the file system containing the file
1702d25fd14170 Aleksa Sarai 2024-05-20 93 * (u64 if AT_HANDLE_UNIQUE_MNT_ID, otherwise int)
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 94 * @flag: flag value to indicate whether to follow symlink or not
96b2b072ee62be Amir Goldstein 2023-05-02 95 * and whether a decodable file handle is required.
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 96 *
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 97 * @handle->handle_size indicate the space available to store the
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 98 * variable part of the file handle in bytes. If there is not
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 99 * enough space, the field is updated to return the minimum
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 100 * value required.
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 101 */
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 @102 SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
1702d25fd14170 Aleksa Sarai 2024-05-20 103 struct file_handle __user *, handle, void __user *, mnt_id,
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 104 int, flag)
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 105 {
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 106 struct path path;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 107 int lookup_flags;
96b2b072ee62be Amir Goldstein 2023-05-02 108 int fh_flags;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 109 int err;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 110
1702d25fd14170 Aleksa Sarai 2024-05-20 111 if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID |
1702d25fd14170 Aleksa Sarai 2024-05-20 112 AT_HANDLE_UNIQUE_MNT_ID))
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 113 return -EINVAL;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 114
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 115 lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
96b2b072ee62be Amir Goldstein 2023-05-02 116 fh_flags = (flag & AT_HANDLE_FID) ? EXPORT_FH_FID : 0;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 117 if (flag & AT_EMPTY_PATH)
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 118 lookup_flags |= LOOKUP_EMPTY;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 119 err = user_path_at(dfd, name, lookup_flags, &path);
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 120 if (!err) {
1702d25fd14170 Aleksa Sarai 2024-05-20 121 err = do_sys_name_to_handle(&path, handle, mnt_id,
1702d25fd14170 Aleksa Sarai 2024-05-20 122 flag & AT_HANDLE_UNIQUE_MNT_ID,
1702d25fd14170 Aleksa Sarai 2024-05-20 123 fh_flags);
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 124 path_put(&path);
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 125 }
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 126 return err;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 127 }
becfd1f3754479 Aneesh Kumar K.V 2011-01-29 128
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-20 21:35 [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2) Aleksa Sarai
` (2 preceding siblings ...)
2024-05-21 3:02 ` kernel test robot
@ 2024-05-21 13:45 ` Christian Brauner
2024-05-21 14:11 ` Christian Brauner
2024-05-23 15:52 ` Aleksa Sarai
2024-05-23 14:47 ` Dan Carpenter
2024-05-26 8:55 ` Christoph Hellwig
5 siblings, 2 replies; 21+ messages in thread
From: Christian Brauner @ 2024-05-21 13:45 UTC (permalink / raw)
To: Aleksa Sarai
Cc: Alexander Viro, Jan Kara, Chuck Lever, Jeff Layton,
Amir Goldstein, Alexander Aring, linux-fsdevel, linux-nfs,
linux-kernel
On Mon, May 20, 2024 at 05:35:49PM -0400, Aleksa Sarai wrote:
> Now that we have stabilised the unique 64-bit mount ID interface in
> statx, we can now provide a race-free way for name_to_handle_at(2) to
> provide a file handle and corresponding mount without needing to worry
> about racing with /proc/mountinfo parsing.
>
> As with AT_HANDLE_FID, AT_HANDLE_UNIQUE_MNT_ID reuses a statx AT_* bit
> that doesn't make sense for name_to_handle_at(2).
>
> Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
> ---
So I think overall this is probably fine (famous last words). If it's
just about being able to retrieve the new mount id without having to
take the hit of another statx system call it's indeed a bit much to
add a revised system call for this. Althoug I did say earlier that I
wouldn't rule that out.
But if we'd that then it'll be a long discussion on the form of the new
system call and the information it exposes.
For example, I lack the grey hair needed to understand why
name_to_handle_at() returns a mount id at all. The pitch in commit
990d6c2d7aee ("vfs: Add name to file handle conversion support") is that
the (old) mount id can be used to "lookup file system specific
information [...] in /proc/<pid>/mountinfo".
Granted, that's doable but it'll mean a lot of careful checking to avoid
races for mount id recycling because they're not even allocated
cyclically. With lots of containers it becomes even more of an issue. So
it's doubtful whether exposing the mount id through name_to_handle_at()
would be something that we'd still do.
So really, if this is just about a use-case where you want to spare the
additional system call for statx() and you need the mnt_id then
overloading is probably ok.
But it remains an unpleasant thing to look at.
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-21 13:45 ` Christian Brauner
@ 2024-05-21 14:11 ` Christian Brauner
2024-05-21 14:27 ` Jeff Layton
2024-05-23 15:52 ` Aleksa Sarai
1 sibling, 1 reply; 21+ messages in thread
From: Christian Brauner @ 2024-05-21 14:11 UTC (permalink / raw)
To: Aleksa Sarai
Cc: Alexander Viro, Jan Kara, Chuck Lever, Jeff Layton,
Amir Goldstein, Alexander Aring, linux-fsdevel, linux-nfs,
linux-kernel
On Tue, May 21, 2024 at 03:46:06PM +0200, Christian Brauner wrote:
> On Mon, May 20, 2024 at 05:35:49PM -0400, Aleksa Sarai wrote:
> > Now that we have stabilised the unique 64-bit mount ID interface in
> > statx, we can now provide a race-free way for name_to_handle_at(2) to
> > provide a file handle and corresponding mount without needing to worry
> > about racing with /proc/mountinfo parsing.
> >
> > As with AT_HANDLE_FID, AT_HANDLE_UNIQUE_MNT_ID reuses a statx AT_* bit
> > that doesn't make sense for name_to_handle_at(2).
> >
> > Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
> > ---
>
> So I think overall this is probably fine (famous last words). If it's
> just about being able to retrieve the new mount id without having to
> take the hit of another statx system call it's indeed a bit much to
> add a revised system call for this. Althoug I did say earlier that I
> wouldn't rule that out.
>
> But if we'd that then it'll be a long discussion on the form of the new
> system call and the information it exposes.
>
> For example, I lack the grey hair needed to understand why
> name_to_handle_at() returns a mount id at all. The pitch in commit
> 990d6c2d7aee ("vfs: Add name to file handle conversion support") is that
> the (old) mount id can be used to "lookup file system specific
> information [...] in /proc/<pid>/mountinfo".
>
> Granted, that's doable but it'll mean a lot of careful checking to avoid
> races for mount id recycling because they're not even allocated
> cyclically. With lots of containers it becomes even more of an issue. So
> it's doubtful whether exposing the mount id through name_to_handle_at()
> would be something that we'd still do.
>
> So really, if this is just about a use-case where you want to spare the
> additional system call for statx() and you need the mnt_id then
> overloading is probably ok.
>
> But it remains an unpleasant thing to look at.
And I'd like an ok from Jeff and Amir if we're going to try this. :)
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-21 14:11 ` Christian Brauner
@ 2024-05-21 14:27 ` Jeff Layton
2024-05-21 16:42 ` Amir Goldstein
0 siblings, 1 reply; 21+ messages in thread
From: Jeff Layton @ 2024-05-21 14:27 UTC (permalink / raw)
To: Christian Brauner, Aleksa Sarai
Cc: Alexander Viro, Jan Kara, Chuck Lever, Amir Goldstein,
Alexander Aring, linux-fsdevel, linux-nfs, linux-kernel
On Tue, 2024-05-21 at 16:11 +0200, Christian Brauner wrote:
> On Tue, May 21, 2024 at 03:46:06PM +0200, Christian Brauner wrote:
> > On Mon, May 20, 2024 at 05:35:49PM -0400, Aleksa Sarai wrote:
> > > Now that we have stabilised the unique 64-bit mount ID interface in
> > > statx, we can now provide a race-free way for name_to_handle_at(2) to
> > > provide a file handle and corresponding mount without needing to worry
> > > about racing with /proc/mountinfo parsing.
> > >
> > > As with AT_HANDLE_FID, AT_HANDLE_UNIQUE_MNT_ID reuses a statx AT_* bit
> > > that doesn't make sense for name_to_handle_at(2).
> > >
> > > Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
> > > ---
> >
> > So I think overall this is probably fine (famous last words). If it's
> > just about being able to retrieve the new mount id without having to
> > take the hit of another statx system call it's indeed a bit much to
> > add a revised system call for this. Althoug I did say earlier that I
> > wouldn't rule that out.
> >
> > But if we'd that then it'll be a long discussion on the form of the new
> > system call and the information it exposes.
> >
> > For example, I lack the grey hair needed to understand why
> > name_to_handle_at() returns a mount id at all. The pitch in commit
> > 990d6c2d7aee ("vfs: Add name to file handle conversion support") is that
> > the (old) mount id can be used to "lookup file system specific
> > information [...] in /proc/<pid>/mountinfo".
> >
> > Granted, that's doable but it'll mean a lot of careful checking to avoid
> > races for mount id recycling because they're not even allocated
> > cyclically. With lots of containers it becomes even more of an issue. So
> > it's doubtful whether exposing the mount id through name_to_handle_at()
> > would be something that we'd still do.
> >
> > So really, if this is just about a use-case where you want to spare the
> > additional system call for statx() and you need the mnt_id then
> > overloading is probably ok.
> >
> > But it remains an unpleasant thing to look at.
>
> And I'd like an ok from Jeff and Amir if we're going to try this. :)
I don't have strong feelings about it other than "it looks sort of
ugly", so I'm OK with doing this.
I suspect we will eventually need name_to_handle_at2, or something
similar, as it seems like we're starting to grow some new use-cases for
filehandles, and hitting the limits of the old syscall. I don't have a
good feel for what that should look like though, so I'm happy to put
that off for a while.
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-21 14:27 ` Jeff Layton
@ 2024-05-21 16:42 ` Amir Goldstein
2024-05-23 19:16 ` Aleksa Sarai
0 siblings, 1 reply; 21+ messages in thread
From: Amir Goldstein @ 2024-05-21 16:42 UTC (permalink / raw)
To: Jeff Layton
Cc: Christian Brauner, Aleksa Sarai, Alexander Viro, Jan Kara,
Chuck Lever, Alexander Aring, linux-fsdevel, linux-nfs,
linux-kernel
On Tue, May 21, 2024 at 5:27 PM Jeff Layton <jlayton@kernel.org> wrote:
>
> On Tue, 2024-05-21 at 16:11 +0200, Christian Brauner wrote:
> > On Tue, May 21, 2024 at 03:46:06PM +0200, Christian Brauner wrote:
> > > On Mon, May 20, 2024 at 05:35:49PM -0400, Aleksa Sarai wrote:
> > > > Now that we have stabilised the unique 64-bit mount ID interface in
> > > > statx, we can now provide a race-free way for name_to_handle_at(2) to
> > > > provide a file handle and corresponding mount without needing to worry
> > > > about racing with /proc/mountinfo parsing.
> > > >
> > > > As with AT_HANDLE_FID, AT_HANDLE_UNIQUE_MNT_ID reuses a statx AT_* bit
> > > > that doesn't make sense for name_to_handle_at(2).
> > > >
> > > > Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
> > > > ---
> > >
> > > So I think overall this is probably fine (famous last words). If it's
> > > just about being able to retrieve the new mount id without having to
> > > take the hit of another statx system call it's indeed a bit much to
> > > add a revised system call for this. Althoug I did say earlier that I
> > > wouldn't rule that out.
> > >
> > > But if we'd that then it'll be a long discussion on the form of the new
> > > system call and the information it exposes.
> > >
> > > For example, I lack the grey hair needed to understand why
> > > name_to_handle_at() returns a mount id at all. The pitch in commit
> > > 990d6c2d7aee ("vfs: Add name to file handle conversion support") is that
> > > the (old) mount id can be used to "lookup file system specific
> > > information [...] in /proc/<pid>/mountinfo".
> > >
> > > Granted, that's doable but it'll mean a lot of careful checking to avoid
> > > races for mount id recycling because they're not even allocated
> > > cyclically. With lots of containers it becomes even more of an issue. So
> > > it's doubtful whether exposing the mount id through name_to_handle_at()
> > > would be something that we'd still do.
> > >
> > > So really, if this is just about a use-case where you want to spare the
> > > additional system call for statx() and you need the mnt_id then
> > > overloading is probably ok.
> > >
> > > But it remains an unpleasant thing to look at.
> >
> > And I'd like an ok from Jeff and Amir if we're going to try this. :)
>
> I don't have strong feelings about it other than "it looks sort of
> ugly", so I'm OK with doing this.
>
> I suspect we will eventually need name_to_handle_at2, or something
> similar, as it seems like we're starting to grow some new use-cases for
> filehandles, and hitting the limits of the old syscall. I don't have a
> good feel for what that should look like though, so I'm happy to put
> that off for a while.
I'm ok with it, but we cannot possibly allow it without any bikeshedding...
Please call it AT_HANDLE_MNT_ID_UNIQUE to align with
STATX_MNT_ID_UNIQUE
and as I wrote, I do not like overloading the AT_*_SYNC flags
and as there is no other obvious candidate to overload, so
I think that it is best to at least declare in a comment that
/* 0x00ff flags are reserved for per-syscall flags */
and use one of those bits for AT_HANDLE_MNT_ID_UNIQUE.
It does not matter whether we decide to unify the AT_ flags
namespace with RENAME_ flags namespace or not.
The fact that there is a syscall named renameat2() with a flags
argument, means that someone is bound to pass in an AT_ flags
in this syscall sooner or later, so the least we can do is try to
delay the day that this will not result in EINVAL.
Thanks,
Amir.
P.S.: As I mentioned to Jeff in LSFMM, I have a patch in my tree
to add AT_HANDLE_CONNECTABLE which I have not yet
decided if it is upstream worthy.
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-21 16:42 ` Amir Goldstein
@ 2024-05-23 19:16 ` Aleksa Sarai
0 siblings, 0 replies; 21+ messages in thread
From: Aleksa Sarai @ 2024-05-23 19:16 UTC (permalink / raw)
To: Amir Goldstein
Cc: Jeff Layton, Christian Brauner, Alexander Viro, Jan Kara,
Chuck Lever, Alexander Aring, linux-fsdevel, linux-nfs,
linux-kernel
[-- Attachment #1: Type: text/plain, Size: 4787 bytes --]
On 2024-05-21, Amir Goldstein <amir73il@gmail.com> wrote:
> On Tue, May 21, 2024 at 5:27 PM Jeff Layton <jlayton@kernel.org> wrote:
> >
> > On Tue, 2024-05-21 at 16:11 +0200, Christian Brauner wrote:
> > > On Tue, May 21, 2024 at 03:46:06PM +0200, Christian Brauner wrote:
> > > > On Mon, May 20, 2024 at 05:35:49PM -0400, Aleksa Sarai wrote:
> > > > > Now that we have stabilised the unique 64-bit mount ID interface in
> > > > > statx, we can now provide a race-free way for name_to_handle_at(2) to
> > > > > provide a file handle and corresponding mount without needing to worry
> > > > > about racing with /proc/mountinfo parsing.
> > > > >
> > > > > As with AT_HANDLE_FID, AT_HANDLE_UNIQUE_MNT_ID reuses a statx AT_* bit
> > > > > that doesn't make sense for name_to_handle_at(2).
> > > > >
> > > > > Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
> > > > > ---
> > > >
> > > > So I think overall this is probably fine (famous last words). If it's
> > > > just about being able to retrieve the new mount id without having to
> > > > take the hit of another statx system call it's indeed a bit much to
> > > > add a revised system call for this. Althoug I did say earlier that I
> > > > wouldn't rule that out.
> > > >
> > > > But if we'd that then it'll be a long discussion on the form of the new
> > > > system call and the information it exposes.
> > > >
> > > > For example, I lack the grey hair needed to understand why
> > > > name_to_handle_at() returns a mount id at all. The pitch in commit
> > > > 990d6c2d7aee ("vfs: Add name to file handle conversion support") is that
> > > > the (old) mount id can be used to "lookup file system specific
> > > > information [...] in /proc/<pid>/mountinfo".
> > > >
> > > > Granted, that's doable but it'll mean a lot of careful checking to avoid
> > > > races for mount id recycling because they're not even allocated
> > > > cyclically. With lots of containers it becomes even more of an issue. So
> > > > it's doubtful whether exposing the mount id through name_to_handle_at()
> > > > would be something that we'd still do.
> > > >
> > > > So really, if this is just about a use-case where you want to spare the
> > > > additional system call for statx() and you need the mnt_id then
> > > > overloading is probably ok.
> > > >
> > > > But it remains an unpleasant thing to look at.
> > >
> > > And I'd like an ok from Jeff and Amir if we're going to try this. :)
> >
> > I don't have strong feelings about it other than "it looks sort of
> > ugly", so I'm OK with doing this.
> >
> > I suspect we will eventually need name_to_handle_at2, or something
> > similar, as it seems like we're starting to grow some new use-cases for
> > filehandles, and hitting the limits of the old syscall. I don't have a
> > good feel for what that should look like though, so I'm happy to put
> > that off for a while.
>
> I'm ok with it, but we cannot possibly allow it without any bikeshedding...
>
> Please call it AT_HANDLE_MNT_ID_UNIQUE to align with
> STATX_MNT_ID_UNIQUE
>
> and as I wrote, I do not like overloading the AT_*_SYNC flags
> and as there is no other obvious candidate to overload, so
> I think that it is best to at least declare in a comment that
>
> /* 0x00ff flags are reserved for per-syscall flags */
>
> and use one of those bits for AT_HANDLE_MNT_ID_UNIQUE.
I can switch the flag to use 0x80, but given there are already
exceptions to that rule, it seems unlikely that this is going to be a
strong guarantee going forward. I will add a comment though.
Note that this will mean that we are planning to only have 15 remaining
generic AT_* flags.
> It does not matter whether we decide to unify the AT_ flags
> namespace with RENAME_ flags namespace or not.
>
> The fact that there is a syscall named renameat2() with a flags
> argument, means that someone is bound to pass in an AT_ flags
> in this syscall sooner or later, so the least we can do is try to
> delay the day that this will not result in EINVAL.
While there is a risk this could happen, in theory a user could also
incorrectly pass AT_* to open(). While ergonomics is important, I think
that most users generally read the docs when figuring out how to use
flags for syscalls (mainly because we don't have a unified flag
namespace for all syscalls) so I don't think this is a huge problem.
(But I'm sure I was part of making this problem worse with RESOLVE_*
flags.)
> Thanks,
> Amir.
>
> P.S.: As I mentioned to Jeff in LSFMM, I have a patch in my tree
> to add AT_HANDLE_CONNECTABLE which I have not yet
> decided if it is upstream worthy.
--
Aleksa Sarai
Senior Software Engineer (Containers)
SUSE Linux GmbH
<https://www.cyphar.com/>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-21 13:45 ` Christian Brauner
2024-05-21 14:11 ` Christian Brauner
@ 2024-05-23 15:52 ` Aleksa Sarai
2024-05-24 12:25 ` Christian Brauner
1 sibling, 1 reply; 21+ messages in thread
From: Aleksa Sarai @ 2024-05-23 15:52 UTC (permalink / raw)
To: Christian Brauner
Cc: Alexander Viro, Jan Kara, Chuck Lever, Jeff Layton,
Amir Goldstein, Alexander Aring, linux-fsdevel, linux-nfs,
linux-kernel
[-- Attachment #1: Type: text/plain, Size: 2732 bytes --]
On 2024-05-21, Christian Brauner <brauner@kernel.org> wrote:
> On Mon, May 20, 2024 at 05:35:49PM -0400, Aleksa Sarai wrote:
> > Now that we have stabilised the unique 64-bit mount ID interface in
> > statx, we can now provide a race-free way for name_to_handle_at(2) to
> > provide a file handle and corresponding mount without needing to worry
> > about racing with /proc/mountinfo parsing.
> >
> > As with AT_HANDLE_FID, AT_HANDLE_UNIQUE_MNT_ID reuses a statx AT_* bit
> > that doesn't make sense for name_to_handle_at(2).
> >
> > Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
> > ---
>
> So I think overall this is probably fine (famous last words). If it's
> just about being able to retrieve the new mount id without having to
> take the hit of another statx system call it's indeed a bit much to
> add a revised system call for this. Althoug I did say earlier that I
> wouldn't rule that out.
>
> But if we'd that then it'll be a long discussion on the form of the new
> system call and the information it exposes.
>
> For example, I lack the grey hair needed to understand why
> name_to_handle_at() returns a mount id at all. The pitch in commit
> 990d6c2d7aee ("vfs: Add name to file handle conversion support") is that
> the (old) mount id can be used to "lookup file system specific
> information [...] in /proc/<pid>/mountinfo".
The logic was presumably to allow you to know what mount the resolved
file handle came from. If you use AT_EMPTY_PATH this is not needed
because you could just fstatfs (and now statx(AT_EMPTY_PATH)), but if
you just give name_to_handle_at() almost any path, there is no race-free
way to make sure that you know which filesystem the file handle came
from.
I don't know if that could lead to security issues (I guess an attacker
could find a way to try to manipulate the file handle you get back, and
then try to trick you into operating on the wrong filesystem with
open_by_handle_at()) but it is definitely something you'd want to avoid.
> Granted, that's doable but it'll mean a lot of careful checking to avoid
> races for mount id recycling because they're not even allocated
> cyclically. With lots of containers it becomes even more of an issue. So
> it's doubtful whether exposing the mount id through name_to_handle_at()
> would be something that we'd still do.
>
> So really, if this is just about a use-case where you want to spare the
> additional system call for statx() and you need the mnt_id then
> overloading is probably ok.
>
> But it remains an unpleasant thing to look at.
>
Yeah, I agree it's ugly.
--
Aleksa Sarai
Senior Software Engineer (Containers)
SUSE Linux GmbH
<https://www.cyphar.com/>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-23 15:52 ` Aleksa Sarai
@ 2024-05-24 12:25 ` Christian Brauner
2024-05-24 15:58 ` Aleksa Sarai
2024-05-27 0:06 ` Dave Chinner
0 siblings, 2 replies; 21+ messages in thread
From: Christian Brauner @ 2024-05-24 12:25 UTC (permalink / raw)
To: Aleksa Sarai
Cc: Alexander Viro, Jan Kara, Chuck Lever, Jeff Layton,
Amir Goldstein, Alexander Aring, linux-fsdevel, linux-nfs,
linux-kernel
On Thu, May 23, 2024 at 09:52:20AM -0600, Aleksa Sarai wrote:
> On 2024-05-21, Christian Brauner <brauner@kernel.org> wrote:
> > On Mon, May 20, 2024 at 05:35:49PM -0400, Aleksa Sarai wrote:
> > > Now that we have stabilised the unique 64-bit mount ID interface in
> > > statx, we can now provide a race-free way for name_to_handle_at(2) to
> > > provide a file handle and corresponding mount without needing to worry
> > > about racing with /proc/mountinfo parsing.
> > >
> > > As with AT_HANDLE_FID, AT_HANDLE_UNIQUE_MNT_ID reuses a statx AT_* bit
> > > that doesn't make sense for name_to_handle_at(2).
> > >
> > > Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
> > > ---
> >
> > So I think overall this is probably fine (famous last words). If it's
> > just about being able to retrieve the new mount id without having to
> > take the hit of another statx system call it's indeed a bit much to
> > add a revised system call for this. Althoug I did say earlier that I
> > wouldn't rule that out.
> >
> > But if we'd that then it'll be a long discussion on the form of the new
> > system call and the information it exposes.
> >
> > For example, I lack the grey hair needed to understand why
> > name_to_handle_at() returns a mount id at all. The pitch in commit
> > 990d6c2d7aee ("vfs: Add name to file handle conversion support") is that
> > the (old) mount id can be used to "lookup file system specific
> > information [...] in /proc/<pid>/mountinfo".
>
> The logic was presumably to allow you to know what mount the resolved
> file handle came from. If you use AT_EMPTY_PATH this is not needed
> because you could just fstatfs (and now statx(AT_EMPTY_PATH)), but if
> you just give name_to_handle_at() almost any path, there is no race-free
> way to make sure that you know which filesystem the file handle came
> from.
>
> I don't know if that could lead to security issues (I guess an attacker
> could find a way to try to manipulate the file handle you get back, and
> then try to trick you into operating on the wrong filesystem with
> open_by_handle_at()) but it is definitely something you'd want to avoid.
So the following paragraphs are prefaced with: I'm not an expert on file
handle encoding and so might be totally wrong.
Afaiu, the uniqueness guarantee of the file handle mostly depends on:
(1) the filesystem
(2) the actual file handling encoding
Looking at file handle encoding to me it looks like it's fairly easy to
fake them in userspace (I guess that's ok if you think about them like a
path but with a weird permission model built around them.) for quite a
few filesystems.
For example, for anything that uses fs/libfs.c:generic_encode_ino32_fh()
it's easy to construct a file handle by retrieving the inode number via
stat and the generation number via FS_IOC_GETVERSION.
Encoding using the inode number and the inode generation number is
probably not very strong so it's not impossible to generate a file
handle that is not unique without knowing in which filesystem to
interpret it in.
The problem is with what name_to_handle_at() returns imho. A mnt_id
doesn't pin the filesystem and the old mnt_id isn't unique. That means
the filesystem can be unmounted and go away and the mnt_id can be
recycled almost immediately for another mount but the file handle is
still there.
So to guarantee that a (weakly encoded) file handle is interpreted in
the right filesystem the file handle must either be accompanied by a
file descriptor that pins the relevant mount or have any other guarantee
that the filesystem doesn't go away (Or of course, the file handle
encodes the uuid of the filesystem or something or uses some sort of
hashing scheme.).
One of the features of file handles is that they're globally usable so
they're interesting to use as handles that can be shared. IOW, one can
send around a file handle to another process without having to pin
anything or have a file descriptor open that needs to be sent via
AF_UNIX.
But as stated above that's potentially risky so one might still have to
send around an fd together with the file handle if sender and receiver
don't share the filesystem for the handle.
However, with the unique mount id things improve quite a bit. Because
now it's possible to send around the unique mount id and the file
handle. Then one can use statmount() to figure out which filesystem this
file handle needs to be interpreted in.
>
> > Granted, that's doable but it'll mean a lot of careful checking to avoid
> > races for mount id recycling because they're not even allocated
> > cyclically. With lots of containers it becomes even more of an issue. So
> > it's doubtful whether exposing the mount id through name_to_handle_at()
> > would be something that we'd still do.
> >
> > So really, if this is just about a use-case where you want to spare the
> > additional system call for statx() and you need the mnt_id then
> > overloading is probably ok.
> >
> > But it remains an unpleasant thing to look at.
> >
>
> Yeah, I agree it's ugly.
>
> --
> Aleksa Sarai
> Senior Software Engineer (Containers)
> SUSE Linux GmbH
> <https://www.cyphar.com/>
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-24 12:25 ` Christian Brauner
@ 2024-05-24 15:58 ` Aleksa Sarai
2024-05-27 0:48 ` Dave Chinner
2024-05-27 0:06 ` Dave Chinner
1 sibling, 1 reply; 21+ messages in thread
From: Aleksa Sarai @ 2024-05-24 15:58 UTC (permalink / raw)
To: Christian Brauner
Cc: Alexander Viro, Jan Kara, Chuck Lever, Jeff Layton,
Amir Goldstein, Alexander Aring, linux-fsdevel, linux-nfs,
linux-kernel
[-- Attachment #1: Type: text/plain, Size: 6488 bytes --]
On 2024-05-24, Christian Brauner <brauner@kernel.org> wrote:
> On Thu, May 23, 2024 at 09:52:20AM -0600, Aleksa Sarai wrote:
> > On 2024-05-21, Christian Brauner <brauner@kernel.org> wrote:
> > > On Mon, May 20, 2024 at 05:35:49PM -0400, Aleksa Sarai wrote:
> > > > Now that we have stabilised the unique 64-bit mount ID interface in
> > > > statx, we can now provide a race-free way for name_to_handle_at(2) to
> > > > provide a file handle and corresponding mount without needing to worry
> > > > about racing with /proc/mountinfo parsing.
> > > >
> > > > As with AT_HANDLE_FID, AT_HANDLE_UNIQUE_MNT_ID reuses a statx AT_* bit
> > > > that doesn't make sense for name_to_handle_at(2).
> > > >
> > > > Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
> > > > ---
> > >
> > > So I think overall this is probably fine (famous last words). If it's
> > > just about being able to retrieve the new mount id without having to
> > > take the hit of another statx system call it's indeed a bit much to
> > > add a revised system call for this. Althoug I did say earlier that I
> > > wouldn't rule that out.
> > >
> > > But if we'd that then it'll be a long discussion on the form of the new
> > > system call and the information it exposes.
> > >
> > > For example, I lack the grey hair needed to understand why
> > > name_to_handle_at() returns a mount id at all. The pitch in commit
> > > 990d6c2d7aee ("vfs: Add name to file handle conversion support") is that
> > > the (old) mount id can be used to "lookup file system specific
> > > information [...] in /proc/<pid>/mountinfo".
> >
> > The logic was presumably to allow you to know what mount the resolved
> > file handle came from. If you use AT_EMPTY_PATH this is not needed
> > because you could just fstatfs (and now statx(AT_EMPTY_PATH)), but if
> > you just give name_to_handle_at() almost any path, there is no race-free
> > way to make sure that you know which filesystem the file handle came
> > from.
> >
> > I don't know if that could lead to security issues (I guess an attacker
> > could find a way to try to manipulate the file handle you get back, and
> > then try to trick you into operating on the wrong filesystem with
> > open_by_handle_at()) but it is definitely something you'd want to avoid.
>
> So the following paragraphs are prefaced with: I'm not an expert on file
> handle encoding and so might be totally wrong.
>
> Afaiu, the uniqueness guarantee of the file handle mostly depends on:
>
> (1) the filesystem
> (2) the actual file handling encoding
>
> Looking at file handle encoding to me it looks like it's fairly easy to
> fake them in userspace (I guess that's ok if you think about them like a
> path but with a weird permission model built around them.) for quite a
> few filesystems.
The old Docker breakout attack did brute-force the fhandle for the root
directory of the host filesystem, so it is entirely possible.
However, the attack I was thinking of was whether a directory tree that
an attacker had mount permissions over could be manipulated such that a
privileged process doing name_to_handle_at() on a path within the tree
would get a file handle that open_by_handle_at() on a different
filesystem would result in a potentially dangerous path being opened.
For instance (M is management process, C is the malicious container
process):
C: Bind-mounts the root of the container filesystem at /foo.
M: name_to_handle_at($CONTAINER/foo)
-> gets an fhandle of / of the container filesystem
-> stores a copy of the (recycled) mount id
C: Swaps /foo with a bind-mount of the host root filesystem such that
the mount id is recycled, before M can scan /proc/self/mountinfo.
C: Triggers M to try to use the filehandle for some administrative
process.
M: open_by_handle_at(...) on the wrong mount id, getting an fd of the
host / directory. Possibly does something bad to this directory
(deleting files, passing the fd back to the container, etc).
It seems possible that this could happen, so having a unique mount id is
kind of important if you plan to use open_by_handle_at() with malicious
actors in control of the target filesystem tree.
Though, regardless of the attack you are worried about, I guess we are
in agreement that a unique mount id from name_to_handle_at() would be a
good idea if we are planning for userspace to use file handles for
everything.
> For example, for anything that uses fs/libfs.c:generic_encode_ino32_fh()
> it's easy to construct a file handle by retrieving the inode number via
> stat and the generation number via FS_IOC_GETVERSION.
>
> Encoding using the inode number and the inode generation number is
> probably not very strong so it's not impossible to generate a file
> handle that is not unique without knowing in which filesystem to
> interpret it in.
>
> The problem is with what name_to_handle_at() returns imho. A mnt_id
> doesn't pin the filesystem and the old mnt_id isn't unique. That means
> the filesystem can be unmounted and go away and the mnt_id can be
> recycled almost immediately for another mount but the file handle is
> still there.
>
> So to guarantee that a (weakly encoded) file handle is interpreted in
> the right filesystem the file handle must either be accompanied by a
> file descriptor that pins the relevant mount or have any other guarantee
> that the filesystem doesn't go away (Or of course, the file handle
> encodes the uuid of the filesystem or something or uses some sort of
> hashing scheme.).
>
> One of the features of file handles is that they're globally usable so
> they're interesting to use as handles that can be shared. IOW, one can
> send around a file handle to another process without having to pin
> anything or have a file descriptor open that needs to be sent via
> AF_UNIX.
>
> But as stated above that's potentially risky so one might still have to
> send around an fd together with the file handle if sender and receiver
> don't share the filesystem for the handle.
>
> However, with the unique mount id things improve quite a bit. Because
> now it's possible to send around the unique mount id and the file
> handle. Then one can use statmount() to figure out which filesystem this
> file handle needs to be interpreted in.
--
Aleksa Sarai
Senior Software Engineer (Containers)
SUSE Linux GmbH
<https://www.cyphar.com/>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-24 15:58 ` Aleksa Sarai
@ 2024-05-27 0:48 ` Dave Chinner
0 siblings, 0 replies; 21+ messages in thread
From: Dave Chinner @ 2024-05-27 0:48 UTC (permalink / raw)
To: Aleksa Sarai
Cc: Christian Brauner, Alexander Viro, Jan Kara, Chuck Lever,
Jeff Layton, Amir Goldstein, Alexander Aring, linux-fsdevel,
linux-nfs, linux-kernel
On Fri, May 24, 2024 at 08:58:55AM -0700, Aleksa Sarai wrote:
>
> Though, regardless of the attack you are worried about, I guess we are
> in agreement that a unique mount id from name_to_handle_at() would be a
> good idea if we are planning for userspace to use file handles for
> everything.
I somewhat disagree - the information needed to validate and
restrict the scope of the filehandle needs to be encoded into the
filehandle itself. Otherwise we've done nothing to reduce the
potential abuse scope of the filehandle object itself, nor prevented
users from generating their own filehandles to objects they don't
have direct access to that are still accessible on the given "mount
id" that are outside their direct path based permission scope.
IOWs, the filehandle must encode the restrictions on it's use
internally so that random untrusted third parties cannot use it
outside the context in which is was intended for...
Whether that internal encoding is a mount ID, and mount namespace
identifier or something else completely different is just a detail.
I suspect that the creation of a restricted filehandle should be
done by a simple API flag (e.g. AT_FH_RESTRICTED), and the kernel
decides entirely what goes into the filehandle to restrict it to the
context that the file handle has been created within.
-Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-24 12:25 ` Christian Brauner
2024-05-24 15:58 ` Aleksa Sarai
@ 2024-05-27 0:06 ` Dave Chinner
2024-05-27 13:39 ` Christian Brauner
1 sibling, 1 reply; 21+ messages in thread
From: Dave Chinner @ 2024-05-27 0:06 UTC (permalink / raw)
To: Christian Brauner
Cc: Aleksa Sarai, Alexander Viro, Jan Kara, Chuck Lever, Jeff Layton,
Amir Goldstein, Alexander Aring, linux-fsdevel, linux-nfs,
linux-kernel
On Fri, May 24, 2024 at 02:25:30PM +0200, Christian Brauner wrote:
> On Thu, May 23, 2024 at 09:52:20AM -0600, Aleksa Sarai wrote:
> > On 2024-05-21, Christian Brauner <brauner@kernel.org> wrote:
> > > On Mon, May 20, 2024 at 05:35:49PM -0400, Aleksa Sarai wrote:
> > > > Now that we have stabilised the unique 64-bit mount ID interface in
> > > > statx, we can now provide a race-free way for name_to_handle_at(2) to
> > > > provide a file handle and corresponding mount without needing to worry
> > > > about racing with /proc/mountinfo parsing.
> > > >
> > > > As with AT_HANDLE_FID, AT_HANDLE_UNIQUE_MNT_ID reuses a statx AT_* bit
> > > > that doesn't make sense for name_to_handle_at(2).
> > > >
> > > > Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
> > > > ---
> > >
> > > So I think overall this is probably fine (famous last words). If it's
> > > just about being able to retrieve the new mount id without having to
> > > take the hit of another statx system call it's indeed a bit much to
> > > add a revised system call for this. Althoug I did say earlier that I
> > > wouldn't rule that out.
> > >
> > > But if we'd that then it'll be a long discussion on the form of the new
> > > system call and the information it exposes.
> > >
> > > For example, I lack the grey hair needed to understand why
> > > name_to_handle_at() returns a mount id at all. The pitch in commit
> > > 990d6c2d7aee ("vfs: Add name to file handle conversion support") is that
> > > the (old) mount id can be used to "lookup file system specific
> > > information [...] in /proc/<pid>/mountinfo".
> >
> > The logic was presumably to allow you to know what mount the resolved
> > file handle came from. If you use AT_EMPTY_PATH this is not needed
> > because you could just fstatfs (and now statx(AT_EMPTY_PATH)), but if
> > you just give name_to_handle_at() almost any path, there is no race-free
> > way to make sure that you know which filesystem the file handle came
> > from.
> >
> > I don't know if that could lead to security issues (I guess an attacker
> > could find a way to try to manipulate the file handle you get back, and
> > then try to trick you into operating on the wrong filesystem with
> > open_by_handle_at()) but it is definitely something you'd want to avoid.
>
> So the following paragraphs are prefaced with: I'm not an expert on file
> handle encoding and so might be totally wrong.
>
> Afaiu, the uniqueness guarantee of the file handle mostly depends on:
>
> (1) the filesystem
> (2) the actual file handling encoding
>
> Looking at file handle encoding to me it looks like it's fairly easy to
> fake them in userspace (I guess that's ok if you think about them like a
> path but with a weird permission model built around them.) for quite a
> few filesystems.
This is a feature specifically used by XFS utilities like xfs_fsr
and xfsdump to take pathless inode information retrieved by bulkstat
operations to a file handle to enable arbitrary inodes in the
filesystem to be opened.
i.e. they scan the filesystem by walking the filesystem's internal
inode index, not by walking paths to scan the filesytsem. This is
*much* faster than path walking, especially on seek-limited storage
hardware.
Knowing the inode number, generation and fs uuid, we can
construct a valid filehandle for that inode, and then do whatever
operations we need to do (defrag, backup, move offline (HSM), etc)
without needing to know the path to that inode....
> The problem is with what name_to_handle_at() returns imho. A mnt_id
> doesn't pin the filesystem and the old mnt_id isn't unique. That means
> the filesystem can be unmounted and go away and the mnt_id can be
> recycled almost immediately for another mount but the file handle is
> still there.
This is no different to the NFS server unexporting a filesystem -
all attempts to resolve the file handle the client holds for that
export must now fail. Hence the filehandle itself must have a
superblock specific identifier in it.
> So to guarantee that a (weakly encoded) file handle is interpreted in
> the right filesystem the file handle must either be accompanied by a
> file descriptor that pins the relevant mount or have any other guarantee
> that the filesystem doesn't go away (Or of course, the file handle
> encodes the uuid of the filesystem or something or uses some sort of
> hashing scheme.).
Yes, it does, and that's the superblock based identifier, not
something that is mount based. i.e. the handle is valid regardless
of where the filesystem is mounted, even if the application does not
have visibility or permitted access to the given mount. And the
filehandle is persistent - it must work across unmount/mount,
reboots, change of mount location, etc.
That means that if you are using file handles, any filesystem that
is shared across multiple containers can by fully accessed by user
generated file handles from any container if no special permissions
are required. i.e. there are no real path or mount based security
boundaries for filehandles in existence righ now.
If filehandles are going to be restricted to a specific container
(e.g. a single mount) so that less permissions are needed to open
the filehandle, then the filehandle itself needs to encode those
restrictions. Decoding the filehandle needs to ensure that the
opening context has permission to access whatever the filehandle
points to in a restricted environment. This will prevent existing
persistent, global filehandles from being used as restricted
filehandles and vice versa.
Restricting filehandles for use as persistent filehandles is going
to be tricky - mount IDs are ephermeral and only valid while a mount
is active. I'd suggest that restricted filehandles should only be
ephemeral, too. That would also allow restricted filehandles to use
kernel side crypto based encoding so nobody can ever construct them
from userspace.
Hence I think we have to look at what we are encoding in the
filehandle itself to solve the "shared access from restricted
environments" problem, not try to add stuff around the outside of
the filehandle API to paper over the fact that filehandles
themselves have no permission/restriction model built into them.
This would also avoid the whole problem of "users can access any
file in the underlying filesystem by constructing their own handles" problem that
relaxed permissions on filehandle decoding creates, hence opening
the door for more extensive use of filehandles in untrusted
environments.
> One of the features of file handles is that they're globally usable so
> they're interesting to use as handles that can be shared. IOW, one can
> send around a file handle to another process without having to pin
> anything or have a file descriptor open that needs to be sent via
> AF_UNIX.
>
> But as stated above that's potentially risky so one might still have to
> send around an fd together with the file handle if sender and receiver
> don't share the filesystem for the handle.
Adding a trust context around the outside of an untrusted handle
isn't the right way to address this problem - define a filehandle
format that can be trusted and constrained within specific security
domains and the need for external permission contexts (whatever they
look like) to be passed with the filehandle to make it valid go away
entirely.
-Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-27 0:06 ` Dave Chinner
@ 2024-05-27 13:39 ` Christian Brauner
0 siblings, 0 replies; 21+ messages in thread
From: Christian Brauner @ 2024-05-27 13:39 UTC (permalink / raw)
To: Dave Chinner
Cc: Aleksa Sarai, Alexander Viro, Jan Kara, Chuck Lever, Jeff Layton,
Amir Goldstein, Alexander Aring, linux-fsdevel, linux-nfs,
linux-kernel
On Mon, May 27, 2024 at 10:06:15AM +1000, Dave Chinner wrote:
> On Fri, May 24, 2024 at 02:25:30PM +0200, Christian Brauner wrote:
> > On Thu, May 23, 2024 at 09:52:20AM -0600, Aleksa Sarai wrote:
> > > On 2024-05-21, Christian Brauner <brauner@kernel.org> wrote:
> > > > On Mon, May 20, 2024 at 05:35:49PM -0400, Aleksa Sarai wrote:
> > > > > Now that we have stabilised the unique 64-bit mount ID interface in
> > > > > statx, we can now provide a race-free way for name_to_handle_at(2) to
> > > > > provide a file handle and corresponding mount without needing to worry
> > > > > about racing with /proc/mountinfo parsing.
> > > > >
> > > > > As with AT_HANDLE_FID, AT_HANDLE_UNIQUE_MNT_ID reuses a statx AT_* bit
> > > > > that doesn't make sense for name_to_handle_at(2).
> > > > >
> > > > > Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
> > > > > ---
> > > >
> > > > So I think overall this is probably fine (famous last words). If it's
> > > > just about being able to retrieve the new mount id without having to
> > > > take the hit of another statx system call it's indeed a bit much to
> > > > add a revised system call for this. Althoug I did say earlier that I
> > > > wouldn't rule that out.
> > > >
> > > > But if we'd that then it'll be a long discussion on the form of the new
> > > > system call and the information it exposes.
> > > >
> > > > For example, I lack the grey hair needed to understand why
> > > > name_to_handle_at() returns a mount id at all. The pitch in commit
> > > > 990d6c2d7aee ("vfs: Add name to file handle conversion support") is that
> > > > the (old) mount id can be used to "lookup file system specific
> > > > information [...] in /proc/<pid>/mountinfo".
> > >
> > > The logic was presumably to allow you to know what mount the resolved
> > > file handle came from. If you use AT_EMPTY_PATH this is not needed
> > > because you could just fstatfs (and now statx(AT_EMPTY_PATH)), but if
> > > you just give name_to_handle_at() almost any path, there is no race-free
> > > way to make sure that you know which filesystem the file handle came
> > > from.
> > >
> > > I don't know if that could lead to security issues (I guess an attacker
> > > could find a way to try to manipulate the file handle you get back, and
> > > then try to trick you into operating on the wrong filesystem with
> > > open_by_handle_at()) but it is definitely something you'd want to avoid.
> >
> > So the following paragraphs are prefaced with: I'm not an expert on file
> > handle encoding and so might be totally wrong.
> >
> > Afaiu, the uniqueness guarantee of the file handle mostly depends on:
> >
> > (1) the filesystem
> > (2) the actual file handling encoding
> >
> > Looking at file handle encoding to me it looks like it's fairly easy to
> > fake them in userspace (I guess that's ok if you think about them like a
> > path but with a weird permission model built around them.) for quite a
> > few filesystems.
>
> This is a feature specifically used by XFS utilities like xfs_fsr
> and xfsdump to take pathless inode information retrieved by bulkstat
> operations to a file handle to enable arbitrary inodes in the
> filesystem to be opened.
>
> i.e. they scan the filesystem by walking the filesystem's internal
> inode index, not by walking paths to scan the filesytsem. This is
> *much* faster than path walking, especially on seek-limited storage
> hardware.
>
> Knowing the inode number, generation and fs uuid, we can
> construct a valid filehandle for that inode, and then do whatever
> operations we need to do (defrag, backup, move offline (HSM), etc)
> without needing to know the path to that inode....
Yeah, I think you mentioned this in another context before.
> > The problem is with what name_to_handle_at() returns imho. A mnt_id
> > doesn't pin the filesystem and the old mnt_id isn't unique. That means
> > the filesystem can be unmounted and go away and the mnt_id can be
> > recycled almost immediately for another mount but the file handle is
> > still there.
>
> This is no different to the NFS server unexporting a filesystem -
> all attempts to resolve the file handle the client holds for that
> export must now fail. Hence the filehandle itself must have a
> superblock specific identifier in it.
>
> > So to guarantee that a (weakly encoded) file handle is interpreted in
> > the right filesystem the file handle must either be accompanied by a
> > file descriptor that pins the relevant mount or have any other guarantee
> > that the filesystem doesn't go away (Or of course, the file handle
> > encodes the uuid of the filesystem or something or uses some sort of
> > hashing scheme.).
>
> Yes, it does, and that's the superblock based identifier, not
> something that is mount based. i.e. the handle is valid regardless
> of where the filesystem is mounted, even if the application does not
> have visibility or permitted access to the given mount. And the
> filehandle is persistent - it must work across unmount/mount,
> reboots, change of mount location, etc.
Agreed, and no one is disputing that. The old mount id as exposed by
name_to_handle_at() is one means to get to a persisent identifier and
that is racy. But we do have a 64bit non-recyled mount id and
statmount() since v6.7 now which allow to get a persistent identifier
for the filesystem in a race-free manner.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-20 21:35 [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2) Aleksa Sarai
` (3 preceding siblings ...)
2024-05-21 13:45 ` Christian Brauner
@ 2024-05-23 14:47 ` Dan Carpenter
2024-05-26 8:55 ` Christoph Hellwig
5 siblings, 0 replies; 21+ messages in thread
From: Dan Carpenter @ 2024-05-23 14:47 UTC (permalink / raw)
To: oe-kbuild, Aleksa Sarai; +Cc: lkp, oe-kbuild-all
Hi Aleksa,
[This is a private test report for your RFC patch.]
kernel test robot noticed the following build warnings:
url: https://github.com/intel-lab-lkp/linux/commits/Aleksa-Sarai/fhandle-expose-u64-mount-id-to-name_to_handle_at-2/20240521-053815
base: 584bbf439d0fa83d728ec49f3a38c581bdc828b4
patch link: https://lore.kernel.org/r/20240520-exportfs-u64-mount-id-v1-1-f55fd9215b8e%40cyphar.com
patch subject: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
config: x86_64-randconfig-161-20240521 (https://download.01.org/0day-ci/archive/20240521/202405211900.ZcYOJf1E-lkp@intel.com/config)
compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
| Closes: https://lore.kernel.org/r/202405211900.ZcYOJf1E-lkp@intel.com/
smatch warnings:
fs/fhandle.c:84 do_sys_name_to_handle() warn: maybe return -EFAULT instead of the bytes remaining?
vim +84 fs/fhandle.c
6ccaaf59c335a8 Al Viro 2022-08-04 17 static long do_sys_name_to_handle(const struct path *path,
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 18 struct file_handle __user *ufh,
1702d25fd14170 Aleksa Sarai 2024-05-20 19 void __user *mnt_id, bool unique_mntid,
1702d25fd14170 Aleksa Sarai 2024-05-20 20 int fh_flags)
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 21 {
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 22 long retval;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 23 struct file_handle f_handle;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 24 int handle_dwords, handle_bytes;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 25 struct file_handle *handle = NULL;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 26
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 27 /*
96b2b072ee62be Amir Goldstein 2023-05-02 28 * We need to make sure whether the file system support decoding of
96b2b072ee62be Amir Goldstein 2023-05-02 29 * the file handle if decodeable file handle was requested.
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 30 */
66c62769bcf6aa Amir Goldstein 2023-10-23 31 if (!exportfs_can_encode_fh(path->dentry->d_sb->s_export_op, fh_flags))
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 32 return -EOPNOTSUPP;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 33
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 34 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 35 return -EFAULT;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 36
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 37 if (f_handle.handle_bytes > MAX_HANDLE_SZ)
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 38 return -EINVAL;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 39
68d6f4f3fbd9b1 Gustavo A. R. Silva 2024-03-25 40 handle = kzalloc(struct_size(handle, f_handle, f_handle.handle_bytes),
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 41 GFP_KERNEL);
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 42 if (!handle)
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 43 return -ENOMEM;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 44
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 45 /* convert handle size to multiple of sizeof(u32) */
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 46 handle_dwords = f_handle.handle_bytes >> 2;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 47
96b2b072ee62be Amir Goldstein 2023-05-02 48 /* we ask for a non connectable maybe decodeable file handle */
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 49 retval = exportfs_encode_fh(path->dentry,
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 50 (struct fid *)handle->f_handle,
96b2b072ee62be Amir Goldstein 2023-05-02 51 &handle_dwords, fh_flags);
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 52 handle->handle_type = retval;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 53 /* convert handle size to bytes */
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 54 handle_bytes = handle_dwords * sizeof(u32);
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 55 handle->handle_bytes = handle_bytes;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 56 if ((handle->handle_bytes > f_handle.handle_bytes) ||
7cdafe6cc4a6ee Amir Goldstein 2023-05-24 57 (retval == FILEID_INVALID) || (retval < 0)) {
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 58 /* As per old exportfs_encode_fh documentation
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 59 * we could return ENOSPC to indicate overflow
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 60 * But file system returned 255 always. So handle
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 61 * both the values
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 62 */
7cdafe6cc4a6ee Amir Goldstein 2023-05-24 63 if (retval == FILEID_INVALID || retval == -ENOSPC)
7cdafe6cc4a6ee Amir Goldstein 2023-05-24 64 retval = -EOVERFLOW;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 65 /*
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 66 * set the handle size to zero so we copy only
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 67 * non variable part of the file_handle
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 68 */
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 69 handle_bytes = 0;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 70 } else
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 71 retval = 0;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 72 /* copy the mount id */
1702d25fd14170 Aleksa Sarai 2024-05-20 73 if (unique_mntid)
1702d25fd14170 Aleksa Sarai 2024-05-20 74 retval = put_user(real_mount(path->mnt)->mnt_id_unique,
1702d25fd14170 Aleksa Sarai 2024-05-20 75 (u64 __user *) mnt_id);
1702d25fd14170 Aleksa Sarai 2024-05-20 76 else
1702d25fd14170 Aleksa Sarai 2024-05-20 77 retval = put_user(real_mount(path->mnt)->mnt_id,
1702d25fd14170 Aleksa Sarai 2024-05-20 78 (int __user *) mnt_id);
1702d25fd14170 Aleksa Sarai 2024-05-20 79 /* copy the handle */
1702d25fd14170 Aleksa Sarai 2024-05-20 80 if (!retval)
1702d25fd14170 Aleksa Sarai 2024-05-20 81 retval = copy_to_user(ufh, handle,
1702d25fd14170 Aleksa Sarai 2024-05-20 82 struct_size(handle, f_handle, handle_bytes));
copy_to_user() doesn't return error codes, it returns the number of
bytes that were NOT copied.
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 83 kfree(handle);
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 @84 return retval;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 85 }
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
2024-05-20 21:35 [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2) Aleksa Sarai
` (4 preceding siblings ...)
2024-05-23 14:47 ` Dan Carpenter
@ 2024-05-26 8:55 ` Christoph Hellwig
5 siblings, 0 replies; 21+ messages in thread
From: Christoph Hellwig @ 2024-05-26 8:55 UTC (permalink / raw)
To: Aleksa Sarai
Cc: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
Jeff Layton, Amir Goldstein, Alexander Aring, linux-fsdevel,
linux-nfs, linux-kernel
On Mon, May 20, 2024 at 05:35:49PM -0400, Aleksa Sarai wrote:
> Now that we have stabilised the unique 64-bit mount ID interface in
> statx, we can now provide a race-free way for name_to_handle_at(2) to
> provide a file handle and corresponding mount without needing to worry
> about racing with /proc/mountinfo parsing.
What are the guarantees for the mount ID? Is it stable across reboots?
If not mixing it with file handles is a very bad idea.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
@ 2024-05-21 12:16 kernel test robot
0 siblings, 0 replies; 21+ messages in thread
From: kernel test robot @ 2024-05-21 12:16 UTC (permalink / raw)
To: oe-kbuild; +Cc: lkp, Dan Carpenter
BCC: lkp@intel.com
CC: oe-kbuild-all@lists.linux.dev
In-Reply-To: <20240520-exportfs-u64-mount-id-v1-1-f55fd9215b8e@cyphar.com>
References: <20240520-exportfs-u64-mount-id-v1-1-f55fd9215b8e@cyphar.com>
TO: Aleksa Sarai <cyphar@cyphar.com>
Hi Aleksa,
[This is a private test report for your RFC patch.]
kernel test robot noticed the following build warnings:
[auto build test WARNING on 584bbf439d0fa83d728ec49f3a38c581bdc828b4]
url: https://github.com/intel-lab-lkp/linux/commits/Aleksa-Sarai/fhandle-expose-u64-mount-id-to-name_to_handle_at-2/20240521-053815
base: 584bbf439d0fa83d728ec49f3a38c581bdc828b4
patch link: https://lore.kernel.org/r/20240520-exportfs-u64-mount-id-v1-1-f55fd9215b8e%40cyphar.com
patch subject: [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2)
:::::: branch date: 14 hours ago
:::::: commit date: 14 hours ago
config: x86_64-randconfig-161-20240521 (https://download.01.org/0day-ci/archive/20240521/202405211900.ZcYOJf1E-lkp@intel.com/config)
compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <error27@gmail.com>
| Closes: https://lore.kernel.org/r/202405211900.ZcYOJf1E-lkp@intel.com/
smatch warnings:
fs/fhandle.c:84 do_sys_name_to_handle() warn: maybe return -EFAULT instead of the bytes remaining?
vim +84 fs/fhandle.c
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 16
6ccaaf59c335a8 Al Viro 2022-08-04 17 static long do_sys_name_to_handle(const struct path *path,
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 18 struct file_handle __user *ufh,
1702d25fd14170 Aleksa Sarai 2024-05-20 19 void __user *mnt_id, bool unique_mntid,
1702d25fd14170 Aleksa Sarai 2024-05-20 20 int fh_flags)
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 21 {
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 22 long retval;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 23 struct file_handle f_handle;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 24 int handle_dwords, handle_bytes;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 25 struct file_handle *handle = NULL;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 26
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 27 /*
96b2b072ee62be Amir Goldstein 2023-05-02 28 * We need to make sure whether the file system support decoding of
96b2b072ee62be Amir Goldstein 2023-05-02 29 * the file handle if decodeable file handle was requested.
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 30 */
66c62769bcf6aa Amir Goldstein 2023-10-23 31 if (!exportfs_can_encode_fh(path->dentry->d_sb->s_export_op, fh_flags))
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 32 return -EOPNOTSUPP;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 33
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 34 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 35 return -EFAULT;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 36
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 37 if (f_handle.handle_bytes > MAX_HANDLE_SZ)
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 38 return -EINVAL;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 39
68d6f4f3fbd9b1 Gustavo A. R. Silva 2024-03-25 40 handle = kzalloc(struct_size(handle, f_handle, f_handle.handle_bytes),
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 41 GFP_KERNEL);
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 42 if (!handle)
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 43 return -ENOMEM;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 44
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 45 /* convert handle size to multiple of sizeof(u32) */
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 46 handle_dwords = f_handle.handle_bytes >> 2;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 47
96b2b072ee62be Amir Goldstein 2023-05-02 48 /* we ask for a non connectable maybe decodeable file handle */
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 49 retval = exportfs_encode_fh(path->dentry,
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 50 (struct fid *)handle->f_handle,
96b2b072ee62be Amir Goldstein 2023-05-02 51 &handle_dwords, fh_flags);
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 52 handle->handle_type = retval;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 53 /* convert handle size to bytes */
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 54 handle_bytes = handle_dwords * sizeof(u32);
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 55 handle->handle_bytes = handle_bytes;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 56 if ((handle->handle_bytes > f_handle.handle_bytes) ||
7cdafe6cc4a6ee Amir Goldstein 2023-05-24 57 (retval == FILEID_INVALID) || (retval < 0)) {
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 58 /* As per old exportfs_encode_fh documentation
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 59 * we could return ENOSPC to indicate overflow
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 60 * But file system returned 255 always. So handle
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 61 * both the values
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 62 */
7cdafe6cc4a6ee Amir Goldstein 2023-05-24 63 if (retval == FILEID_INVALID || retval == -ENOSPC)
7cdafe6cc4a6ee Amir Goldstein 2023-05-24 64 retval = -EOVERFLOW;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 65 /*
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 66 * set the handle size to zero so we copy only
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 67 * non variable part of the file_handle
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 68 */
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 69 handle_bytes = 0;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 70 } else
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 71 retval = 0;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 72 /* copy the mount id */
1702d25fd14170 Aleksa Sarai 2024-05-20 73 if (unique_mntid)
1702d25fd14170 Aleksa Sarai 2024-05-20 74 retval = put_user(real_mount(path->mnt)->mnt_id_unique,
1702d25fd14170 Aleksa Sarai 2024-05-20 75 (u64 __user *) mnt_id);
1702d25fd14170 Aleksa Sarai 2024-05-20 76 else
1702d25fd14170 Aleksa Sarai 2024-05-20 77 retval = put_user(real_mount(path->mnt)->mnt_id,
1702d25fd14170 Aleksa Sarai 2024-05-20 78 (int __user *) mnt_id);
1702d25fd14170 Aleksa Sarai 2024-05-20 79 /* copy the handle */
1702d25fd14170 Aleksa Sarai 2024-05-20 80 if (!retval)
1702d25fd14170 Aleksa Sarai 2024-05-20 81 retval = copy_to_user(ufh, handle,
1702d25fd14170 Aleksa Sarai 2024-05-20 82 struct_size(handle, f_handle, handle_bytes));
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 83 kfree(handle);
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 @84 return retval;
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 85 }
990d6c2d7aee92 Aneesh Kumar K.V 2011-01-29 86
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2024-05-27 13:40 UTC | newest]
Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-05-20 21:35 [PATCH RFC] fhandle: expose u64 mount id to name_to_handle_at(2) Aleksa Sarai
2024-05-20 21:53 ` Jeff Layton
2024-05-20 22:27 ` Aleksa Sarai
2024-05-21 5:04 ` Amir Goldstein
2024-05-21 10:42 ` Aleksa Sarai
2024-05-21 2:18 ` kernel test robot
2024-05-21 3:02 ` kernel test robot
2024-05-21 13:45 ` Christian Brauner
2024-05-21 14:11 ` Christian Brauner
2024-05-21 14:27 ` Jeff Layton
2024-05-21 16:42 ` Amir Goldstein
2024-05-23 19:16 ` Aleksa Sarai
2024-05-23 15:52 ` Aleksa Sarai
2024-05-24 12:25 ` Christian Brauner
2024-05-24 15:58 ` Aleksa Sarai
2024-05-27 0:48 ` Dave Chinner
2024-05-27 0:06 ` Dave Chinner
2024-05-27 13:39 ` Christian Brauner
2024-05-23 14:47 ` Dan Carpenter
2024-05-26 8:55 ` Christoph Hellwig
-- strict thread matches above, loose matches on Subject: below --
2024-05-21 12:16 kernel test robot
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.