All of lore.kernel.org
 help / color / mirror / Atom feed
From: viro@kernel.org
To: linux-fsdevel@vger.kernel.org
Cc: amir73il@gmail.com, bpf@vger.kernel.org, brauner@kernel.org,
	cgroups@vger.kernel.org, kvm@vger.kernel.org,
	netdev@vger.kernel.org, torvalds@linux-foundation.org
Subject: [PATCH 03/39] struct fd: representation change
Date: Tue, 30 Jul 2024 01:15:49 -0400	[thread overview]
Message-ID: <20240730051625.14349-3-viro@kernel.org> (raw)
In-Reply-To: <20240730051625.14349-1-viro@kernel.org>

From: Al Viro <viro@zeniv.linux.org.uk>

	The absolute majority of instances comes from fdget() and its
relatives; the underlying primitives actually return a struct file
reference and a couple of flags encoded into an unsigned long - the lower
two bits of file address are always zero, so we can stash the flags
into those.  On the way out we use __to_fd() to unpack that unsigned
long into struct fd.

	Let's use that representation for struct fd itself - make it
a structure with a single unsigned long member (.word), with the value
equal either to (unsigned long)p | flags, p being an address of some
struct file instance, or to 0 for an empty fd.

	Note that we never used a struct fd instance with NULL ->file
and non-zero ->flags; the emptiness had been checked as (!f.file) and
we expected e.g. fdput(empty) to be a no-op.  With new representation
we can use (!f.word) for emptiness check; that is enough for compiler
to figure out that (f.word & FDPUT_FPUT) will be false and that fdput(f)
will be a no-op in such case.

	For now the new predicate (fd_empty(f)) has no users; all the
existing checks have form (!fd_file(f)).  We will convert to fd_empty()
use later; here we only define it (and tell the compiler that it's
unlikely to return true).

	This commit only deals with representation change; there will
be followups.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/infiniband/core/uverbs_cmd.c |  2 +-
 fs/overlayfs/file.c                  | 28 +++++++++++++++-------------
 fs/xfs/xfs_handle.c                  |  2 +-
 include/linux/file.h                 | 22 ++++++++++++++++------
 kernel/events/core.c                 |  2 +-
 net/socket.c                         |  2 +-
 6 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 3f85575cf971..a4cce360df21 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -572,7 +572,7 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
 	struct inode                   *inode = NULL;
 	int				new_xrcd = 0;
 	struct ib_device *ib_dev;
-	struct fd f = {};
+	struct fd f = EMPTY_FD;
 	int ret;
 
 	ret = uverbs_request(attrs, &cmd, sizeof(cmd));
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index c4963d0c5549..2b7a5a3a7a2f 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -93,11 +93,11 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
 			       bool allow_meta)
 {
 	struct dentry *dentry = file_dentry(file);
+	struct file *realfile = file->private_data;
 	struct path realpath;
 	int err;
 
-	real->flags = 0;
-	real->file = file->private_data;
+	real->word = (unsigned long)realfile;
 
 	if (allow_meta) {
 		ovl_path_real(dentry, &realpath);
@@ -113,16 +113,17 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
 		return -EIO;
 
 	/* Has it been copied up since we'd opened it? */
-	if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) {
-		real->flags = FDPUT_FPUT;
-		real->file = ovl_open_realfile(file, &realpath);
-
-		return PTR_ERR_OR_ZERO(real->file);
+	if (unlikely(file_inode(realfile) != d_inode(realpath.dentry))) {
+		struct file *f = ovl_open_realfile(file, &realpath);
+		if (IS_ERR(f))
+			return PTR_ERR(f);
+		real->word = (unsigned long)ovl_open_realfile(file, &realpath) | FDPUT_FPUT;
+		return 0;
 	}
 
 	/* Did the flags change since open? */
-	if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
-		return ovl_change_flags(real->file, file->f_flags);
+	if (unlikely((file->f_flags ^ realfile->f_flags) & ~OVL_OPEN_FLAGS))
+		return ovl_change_flags(realfile, file->f_flags);
 
 	return 0;
 }
@@ -130,10 +131,11 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
 static int ovl_real_fdget(const struct file *file, struct fd *real)
 {
 	if (d_is_dir(file_dentry(file))) {
-		real->flags = 0;
-		real->file = ovl_dir_real_file(file, false);
-
-		return PTR_ERR_OR_ZERO(real->file);
+		struct file *f = ovl_dir_real_file(file, false);
+		if (IS_ERR(f))
+			return PTR_ERR(f);
+		real->word = (unsigned long)f;
+		return 0;
 	}
 
 	return ovl_real_fdget_meta(file, real, false);
diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c
index 7bcc4f519cb8..49e5e5f04e60 100644
--- a/fs/xfs/xfs_handle.c
+++ b/fs/xfs/xfs_handle.c
@@ -85,7 +85,7 @@ xfs_find_handle(
 	int			hsize;
 	xfs_handle_t		handle;
 	struct inode		*inode;
-	struct fd		f = {NULL};
+	struct fd		f = EMPTY_FD;
 	struct path		path;
 	int			error;
 	struct xfs_inode	*ip;
diff --git a/include/linux/file.h b/include/linux/file.h
index 0f3f369f2450..bdd6e1766839 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -35,18 +35,28 @@ static inline void fput_light(struct file *file, int fput_needed)
 		fput(file);
 }
 
+/* either a reference to struct file + flags
+ * (cloned vs. borrowed, pos locked), with
+ * flags stored in lower bits of value,
+ * or empty (represented by 0).
+ */
 struct fd {
-	struct file *file;
-	unsigned int flags;
+	unsigned long word;
 };
 #define FDPUT_FPUT       1
 #define FDPUT_POS_UNLOCK 2
 
-#define fd_file(f) ((f).file)
+#define fd_file(f) ((struct file *)((f).word & ~3))
+static inline bool fd_empty(struct fd f)
+{
+	return unlikely(!f.word);
+}
+
+#define EMPTY_FD (struct fd){0}
 
 static inline void fdput(struct fd fd)
 {
-	if (fd.flags & FDPUT_FPUT)
+	if (fd.word & FDPUT_FPUT)
 		fput(fd_file(fd));
 }
 
@@ -60,7 +70,7 @@ extern void __f_unlock_pos(struct file *);
 
 static inline struct fd __to_fd(unsigned long v)
 {
-	return (struct fd){(struct file *)(v & ~3),v & 3};
+	return (struct fd){v};
 }
 
 static inline struct fd fdget(unsigned int fd)
@@ -80,7 +90,7 @@ static inline struct fd fdget_pos(int fd)
 
 static inline void fdput_pos(struct fd f)
 {
-	if (f.flags & FDPUT_POS_UNLOCK)
+	if (f.word & FDPUT_POS_UNLOCK)
 		__f_unlock_pos(fd_file(f));
 	fdput(f);
 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 17b19d3e74ba..fd2ac9c7fd77 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -12474,7 +12474,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	struct perf_event_attr attr;
 	struct perf_event_context *ctx;
 	struct file *event_file = NULL;
-	struct fd group = {NULL, 0};
+	struct fd group = EMPTY_FD;
 	struct task_struct *task = NULL;
 	struct pmu *pmu;
 	int event_fd;
diff --git a/net/socket.c b/net/socket.c
index f77a42a74510..c0d4f5032374 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -559,7 +559,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
 	if (fd_file(f)) {
 		sock = sock_from_file(fd_file(f));
 		if (likely(sock)) {
-			*fput_needed = f.flags & FDPUT_FPUT;
+			*fput_needed = f.word & FDPUT_FPUT;
 			return sock;
 		}
 		*err = -ENOTSOCK;
-- 
2.39.2


  parent reply	other threads:[~2024-07-30  5:14 UTC|newest]

Thread overview: 134+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-30  5:09 [PATCHSET][RFC] struct fd and memory safety Al Viro
2024-07-30  5:15 ` [PATCH 01/39] memcg_write_event_control(): fix a user-triggerable oops viro
2024-07-30  5:15   ` [PATCH 02/39] introduce fd_file(), convert all accessors to it viro
2024-08-07  9:55     ` Christian Brauner
2024-07-30  5:15   ` viro [this message]
2024-07-30 18:10     ` [PATCH 03/39] struct fd: representation change Josef Bacik
2024-08-07 10:07       ` Christian Brauner
2024-08-07 10:03     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 04/39] add struct fd constructors, get rid of __to_fd() viro
2024-08-07 10:09     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 05/39] regularize emptiness checks in fini_module(2) and vfs_dedupe_file_range() viro
2024-08-07 10:10     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 06/39] net/socket.c: switch to CLASS(fd) viro
2024-08-07 10:13     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 07/39] introduce struct fderr, convert overlayfs uses to that viro
2024-07-30  5:15   ` [PATCH 08/39] experimental: convert fs/overlayfs/file.c to CLASS(...) viro
2024-07-30 19:10     ` Josef Bacik
2024-07-30 21:12       ` Al Viro
2024-07-31 21:11         ` Josef Bacik
2024-08-07 10:23     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 09/39] timerfd: switch to CLASS(fd, ...) viro
2024-08-07 10:24     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 10/39] get rid of perf_fget_light(), convert kernel/events/core.c to CLASS(fd) viro
2024-08-07 10:25     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 11/39] switch netlink_getsockbyfilp() to taking descriptor viro
2024-08-07 10:26     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 12/39] do_mq_notify(): saner skb freeing on failures viro
2024-07-30  5:15   ` [PATCH 13/39] do_mq_notify(): switch to CLASS(fd, ...) viro
2024-08-07 10:27     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 14/39] simplify xfs_find_handle() a bit viro
2024-07-30  5:16   ` [PATCH 15/39] convert vmsplice() to CLASS(fd, ...) viro
2024-08-07 10:27     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 16/39] convert __bpf_prog_get() " viro
2024-08-06 21:08     ` Andrii Nakryiko
2024-08-07 10:28     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 17/39] bpf: resolve_pseudo_ldimm64(): take handling of a single ldimm64 insn into helper viro
2024-08-06 22:32     ` Andrii Nakryiko
2024-08-07 10:29       ` Christian Brauner
2024-08-07 15:30         ` Andrii Nakryiko
2024-08-08 16:51           ` Alexei Starovoitov
2024-08-08 20:35             ` Andrii Nakryiko
2024-08-09  1:23               ` Alexei Starovoitov
2024-08-09 17:23                 ` Andrii Nakryiko
2024-08-10  3:29             ` Al Viro
2024-08-12 20:05               ` Andrii Nakryiko
2024-08-13  2:06                 ` Al Viro
2024-08-13  3:32                   ` Andrii Nakryiko
2024-07-30  5:16   ` [PATCH 18/39] bpf maps: switch to CLASS(fd, ...) viro
2024-08-07 10:34     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 19/39] fdget_raw() users: switch to CLASS(fd_raw, ...) viro
2024-08-07 10:35     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 20/39] introduce "fd_pos" class, convert fdget_pos() users to it viro
2024-08-07 10:36     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 21/39] o2hb_region_dev_store(): avoid goto around fdget()/fdput() viro
2024-07-30  5:16   ` [PATCH 22/39] privcmd_ioeventfd_assign(): don't open-code eventfd_ctx_fdget() viro
2024-07-30  5:16   ` [PATCH 23/39] fdget(), trivial conversions viro
2024-08-07 10:37     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 24/39] fdget(), more " viro
2024-08-07 10:39     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 25/39] convert do_preadv()/do_pwritev() viro
2024-08-07 10:39     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 26/39] convert cachestat(2) viro
2024-08-07 10:39     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 27/39] switch spufs_calls_{get,put}() to CLASS() use viro
2024-07-30  5:16   ` [PATCH 28/39] convert spu_run(2) viro
2024-08-07 10:40     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 29/39] convert media_request_get_by_fd() viro
2024-08-07 10:40     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 30/39] convert coda_parse_fd() viro
2024-08-07 10:41     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 31/39] convert cifs_ioctl_copychunk() viro
2024-08-07 10:41     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 32/39] convert vfs_dedupe_file_range() viro
2024-08-07 10:42     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 33/39] convert do_select() viro
2024-08-07 10:42     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 34/39] do_pollfd(): convert to CLASS(fd) viro
2024-08-07 10:43     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 35/39] convert bpf_token_create() viro
2024-08-06 22:42     ` Andrii Nakryiko
2024-08-10  3:46       ` Al Viro
2024-08-12 20:06         ` Andrii Nakryiko
2024-08-07 10:44     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 36/39] assorted variants of irqfd setup: convert to CLASS(fd) viro
2024-08-07 10:46     ` Christian Brauner
2024-08-10  3:53       ` Al Viro
2024-07-30  5:16   ` [PATCH 37/39] memcg_write_event_control(): switch " viro
2024-08-07 10:47     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 38/39] css_set_fork(): switch to CLASS(fd_raw, ...) viro
2024-08-07 10:47     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 39/39] deal with the last remaing boolean uses of fd_file() viro
2024-08-07 10:48     ` Christian Brauner
2024-07-30  7:13   ` [PATCH 01/39] memcg_write_event_control(): fix a user-triggerable oops Michal Hocko
2024-07-30  7:18     ` Al Viro
2024-07-30  7:37       ` Michal Hocko
2024-07-30  5:17 ` [PATCHSET][RFC] struct fd and memory safety Al Viro
2024-07-30 20:02 ` Josef Bacik
2024-07-31  0:43 ` Al Viro
2024-08-06 17:58 ` Jason Gunthorpe
2024-08-06 18:56   ` Al Viro
2024-08-07 10:51 ` Christian Brauner
2024-11-02  5:02 ` [PATCHSET v3] " Al Viro
2024-11-02  5:07   ` [PATCH v3 01/28] net/socket.c: switch to CLASS(fd) Al Viro
2024-11-02  5:08     ` [PATCH v3 02/28] regularize emptiness checks in fini_module(2) and vfs_dedupe_file_range() Al Viro
2024-11-02  5:08     ` [PATCH v3 03/28] timerfd: switch to CLASS(fd) Al Viro
2024-11-02  5:08     ` [PATCH v3 04/28] get rid of perf_fget_light(), convert kernel/events/core.c " Al Viro
2024-11-02  5:08     ` [PATCH v3 05/28] switch netlink_getsockbyfilp() to taking descriptor Al Viro
2024-11-02  5:08     ` [PATCH v3 06/28] do_mq_notify(): saner skb freeing on failures Al Viro
2024-11-02  5:08     ` [PATCH v3 07/28] do_mq_notify(): switch to CLASS(fd) Al Viro
2024-11-02  5:08     ` [PATCH v3 08/28] simplify xfs_find_handle() a bit Al Viro
2024-11-02  5:08     ` [PATCH v3 09/28] convert vmsplice() to CLASS(fd) Al Viro
2024-11-02  5:08     ` [PATCH v3 10/28] fdget_raw() users: switch to CLASS(fd_raw) Al Viro
2024-11-02  5:08     ` [PATCH v3 11/28] introduce "fd_pos" class, convert fdget_pos() users to it Al Viro
2024-11-02  5:08     ` [PATCH v3 12/28] o2hb_region_dev_store(): avoid goto around fdget()/fdput() Al Viro
2024-11-02  5:08     ` [PATCH v3 13/28] privcmd_ioeventfd_assign(): don't open-code eventfd_ctx_fdget() Al Viro
2024-11-02  5:08     ` [PATCH v3 14/28] fdget(), trivial conversions Al Viro
2024-11-11 17:22       ` Francesco Lavra
2024-11-02  5:08     ` [PATCH v3 15/28] fdget(), more " Al Viro
2024-11-02  5:08     ` [PATCH v3 16/28] convert do_preadv()/do_pwritev() Al Viro
2024-11-02  5:08     ` [PATCH v3 17/28] convert cachestat(2) Al Viro
2024-11-02  5:08     ` [PATCH v3 18/28] switch spufs_calls_{get,put}() to CLASS() use Al Viro
2024-11-02  5:08     ` [PATCH v3 19/28] convert spu_run(2) Al Viro
2024-11-02  5:08     ` [PATCH v3 20/28] convert media_request_get_by_fd() Al Viro
2024-11-02  5:08     ` [PATCH v3 21/28] convert cifs_ioctl_copychunk() Al Viro
2024-11-02  5:08     ` [PATCH v3 22/28] convert vfs_dedupe_file_range() Al Viro
2024-11-02  5:08     ` [PATCH v3 23/28] convert do_select() Al Viro
2024-11-02  5:08     ` [PATCH v3 24/28] do_pollfd(): convert to CLASS(fd) Al Viro
2024-11-02  5:08     ` [PATCH v3 25/28] assorted variants of irqfd setup: " Al Viro
2024-11-02  5:08     ` [PATCH v3 26/28] memcg_write_event_control(): switch " Al Viro
2024-11-02  5:08     ` [PATCH v3 27/28] css_set_fork(): switch to CLASS(fd_raw, ...) Al Viro
2024-11-02  5:08     ` [PATCH v3 28/28] deal with the last remaing boolean uses of fd_file() Al Viro
2024-11-02 12:21     ` [PATCH v3 01/28] net/socket.c: switch to CLASS(fd) Simon Horman
2024-11-03  6:31       ` Al Viro
2024-11-06 10:03         ` Simon Horman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240730051625.14349-3-viro@kernel.org \
    --to=viro@kernel.org \
    --cc=amir73il@gmail.com \
    --cc=bpf@vger.kernel.org \
    --cc=brauner@kernel.org \
    --cc=cgroups@vger.kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.