linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: viro@kernel.org
To: linux-fsdevel@vger.kernel.org
Cc: amir73il@gmail.com, bpf@vger.kernel.org, brauner@kernel.org,
	cgroups@vger.kernel.org, kvm@vger.kernel.org,
	netdev@vger.kernel.org, torvalds@linux-foundation.org
Subject: [PATCH 03/39] struct fd: representation change
Date: Tue, 30 Jul 2024 01:15:49 -0400	[thread overview]
Message-ID: <20240730051625.14349-3-viro@kernel.org> (raw)
In-Reply-To: <20240730051625.14349-1-viro@kernel.org>

From: Al Viro <viro@zeniv.linux.org.uk>

	The absolute majority of instances comes from fdget() and its
relatives; the underlying primitives actually return a struct file
reference and a couple of flags encoded into an unsigned long - the lower
two bits of file address are always zero, so we can stash the flags
into those.  On the way out we use __to_fd() to unpack that unsigned
long into struct fd.

	Let's use that representation for struct fd itself - make it
a structure with a single unsigned long member (.word), with the value
equal either to (unsigned long)p | flags, p being an address of some
struct file instance, or to 0 for an empty fd.

	Note that we never used a struct fd instance with NULL ->file
and non-zero ->flags; the emptiness had been checked as (!f.file) and
we expected e.g. fdput(empty) to be a no-op.  With new representation
we can use (!f.word) for emptiness check; that is enough for compiler
to figure out that (f.word & FDPUT_FPUT) will be false and that fdput(f)
will be a no-op in such case.

	For now the new predicate (fd_empty(f)) has no users; all the
existing checks have form (!fd_file(f)).  We will convert to fd_empty()
use later; here we only define it (and tell the compiler that it's
unlikely to return true).

	This commit only deals with representation change; there will
be followups.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/infiniband/core/uverbs_cmd.c |  2 +-
 fs/overlayfs/file.c                  | 28 +++++++++++++++-------------
 fs/xfs/xfs_handle.c                  |  2 +-
 include/linux/file.h                 | 22 ++++++++++++++++------
 kernel/events/core.c                 |  2 +-
 net/socket.c                         |  2 +-
 6 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 3f85575cf971..a4cce360df21 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -572,7 +572,7 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
 	struct inode                   *inode = NULL;
 	int				new_xrcd = 0;
 	struct ib_device *ib_dev;
-	struct fd f = {};
+	struct fd f = EMPTY_FD;
 	int ret;
 
 	ret = uverbs_request(attrs, &cmd, sizeof(cmd));
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index c4963d0c5549..2b7a5a3a7a2f 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -93,11 +93,11 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
 			       bool allow_meta)
 {
 	struct dentry *dentry = file_dentry(file);
+	struct file *realfile = file->private_data;
 	struct path realpath;
 	int err;
 
-	real->flags = 0;
-	real->file = file->private_data;
+	real->word = (unsigned long)realfile;
 
 	if (allow_meta) {
 		ovl_path_real(dentry, &realpath);
@@ -113,16 +113,17 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
 		return -EIO;
 
 	/* Has it been copied up since we'd opened it? */
-	if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) {
-		real->flags = FDPUT_FPUT;
-		real->file = ovl_open_realfile(file, &realpath);
-
-		return PTR_ERR_OR_ZERO(real->file);
+	if (unlikely(file_inode(realfile) != d_inode(realpath.dentry))) {
+		struct file *f = ovl_open_realfile(file, &realpath);
+		if (IS_ERR(f))
+			return PTR_ERR(f);
+		real->word = (unsigned long)ovl_open_realfile(file, &realpath) | FDPUT_FPUT;
+		return 0;
 	}
 
 	/* Did the flags change since open? */
-	if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
-		return ovl_change_flags(real->file, file->f_flags);
+	if (unlikely((file->f_flags ^ realfile->f_flags) & ~OVL_OPEN_FLAGS))
+		return ovl_change_flags(realfile, file->f_flags);
 
 	return 0;
 }
@@ -130,10 +131,11 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
 static int ovl_real_fdget(const struct file *file, struct fd *real)
 {
 	if (d_is_dir(file_dentry(file))) {
-		real->flags = 0;
-		real->file = ovl_dir_real_file(file, false);
-
-		return PTR_ERR_OR_ZERO(real->file);
+		struct file *f = ovl_dir_real_file(file, false);
+		if (IS_ERR(f))
+			return PTR_ERR(f);
+		real->word = (unsigned long)f;
+		return 0;
 	}
 
 	return ovl_real_fdget_meta(file, real, false);
diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c
index 7bcc4f519cb8..49e5e5f04e60 100644
--- a/fs/xfs/xfs_handle.c
+++ b/fs/xfs/xfs_handle.c
@@ -85,7 +85,7 @@ xfs_find_handle(
 	int			hsize;
 	xfs_handle_t		handle;
 	struct inode		*inode;
-	struct fd		f = {NULL};
+	struct fd		f = EMPTY_FD;
 	struct path		path;
 	int			error;
 	struct xfs_inode	*ip;
diff --git a/include/linux/file.h b/include/linux/file.h
index 0f3f369f2450..bdd6e1766839 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -35,18 +35,28 @@ static inline void fput_light(struct file *file, int fput_needed)
 		fput(file);
 }
 
+/* either a reference to struct file + flags
+ * (cloned vs. borrowed, pos locked), with
+ * flags stored in lower bits of value,
+ * or empty (represented by 0).
+ */
 struct fd {
-	struct file *file;
-	unsigned int flags;
+	unsigned long word;
 };
 #define FDPUT_FPUT       1
 #define FDPUT_POS_UNLOCK 2
 
-#define fd_file(f) ((f).file)
+#define fd_file(f) ((struct file *)((f).word & ~3))
+static inline bool fd_empty(struct fd f)
+{
+	return unlikely(!f.word);
+}
+
+#define EMPTY_FD (struct fd){0}
 
 static inline void fdput(struct fd fd)
 {
-	if (fd.flags & FDPUT_FPUT)
+	if (fd.word & FDPUT_FPUT)
 		fput(fd_file(fd));
 }
 
@@ -60,7 +70,7 @@ extern void __f_unlock_pos(struct file *);
 
 static inline struct fd __to_fd(unsigned long v)
 {
-	return (struct fd){(struct file *)(v & ~3),v & 3};
+	return (struct fd){v};
 }
 
 static inline struct fd fdget(unsigned int fd)
@@ -80,7 +90,7 @@ static inline struct fd fdget_pos(int fd)
 
 static inline void fdput_pos(struct fd f)
 {
-	if (f.flags & FDPUT_POS_UNLOCK)
+	if (f.word & FDPUT_POS_UNLOCK)
 		__f_unlock_pos(fd_file(f));
 	fdput(f);
 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 17b19d3e74ba..fd2ac9c7fd77 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -12474,7 +12474,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	struct perf_event_attr attr;
 	struct perf_event_context *ctx;
 	struct file *event_file = NULL;
-	struct fd group = {NULL, 0};
+	struct fd group = EMPTY_FD;
 	struct task_struct *task = NULL;
 	struct pmu *pmu;
 	int event_fd;
diff --git a/net/socket.c b/net/socket.c
index f77a42a74510..c0d4f5032374 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -559,7 +559,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
 	if (fd_file(f)) {
 		sock = sock_from_file(fd_file(f));
 		if (likely(sock)) {
-			*fput_needed = f.flags & FDPUT_FPUT;
+			*fput_needed = f.word & FDPUT_FPUT;
 			return sock;
 		}
 		*err = -ENOTSOCK;
-- 
2.39.2


  parent reply	other threads:[~2024-07-30  5:14 UTC|newest]

Thread overview: 134+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-30  5:09 [PATCHSET][RFC] struct fd and memory safety Al Viro
2024-07-30  5:15 ` [PATCH 01/39] memcg_write_event_control(): fix a user-triggerable oops viro
2024-07-30  5:15   ` [PATCH 02/39] introduce fd_file(), convert all accessors to it viro
2024-08-07  9:55     ` Christian Brauner
2024-07-30  5:15   ` viro [this message]
2024-07-30 18:10     ` [PATCH 03/39] struct fd: representation change Josef Bacik
2024-08-07 10:07       ` Christian Brauner
2024-08-07 10:03     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 04/39] add struct fd constructors, get rid of __to_fd() viro
2024-08-07 10:09     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 05/39] regularize emptiness checks in fini_module(2) and vfs_dedupe_file_range() viro
2024-08-07 10:10     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 06/39] net/socket.c: switch to CLASS(fd) viro
2024-08-07 10:13     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 07/39] introduce struct fderr, convert overlayfs uses to that viro
2024-07-30  5:15   ` [PATCH 08/39] experimental: convert fs/overlayfs/file.c to CLASS(...) viro
2024-07-30 19:10     ` Josef Bacik
2024-07-30 21:12       ` Al Viro
2024-07-31 21:11         ` Josef Bacik
2024-08-07 10:23     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 09/39] timerfd: switch to CLASS(fd, ...) viro
2024-08-07 10:24     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 10/39] get rid of perf_fget_light(), convert kernel/events/core.c to CLASS(fd) viro
2024-08-07 10:25     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 11/39] switch netlink_getsockbyfilp() to taking descriptor viro
2024-08-07 10:26     ` Christian Brauner
2024-07-30  5:15   ` [PATCH 12/39] do_mq_notify(): saner skb freeing on failures viro
2024-07-30  5:15   ` [PATCH 13/39] do_mq_notify(): switch to CLASS(fd, ...) viro
2024-08-07 10:27     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 14/39] simplify xfs_find_handle() a bit viro
2024-07-30  5:16   ` [PATCH 15/39] convert vmsplice() to CLASS(fd, ...) viro
2024-08-07 10:27     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 16/39] convert __bpf_prog_get() " viro
2024-08-06 21:08     ` Andrii Nakryiko
2024-08-07 10:28     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 17/39] bpf: resolve_pseudo_ldimm64(): take handling of a single ldimm64 insn into helper viro
2024-08-06 22:32     ` Andrii Nakryiko
2024-08-07 10:29       ` Christian Brauner
2024-08-07 15:30         ` Andrii Nakryiko
2024-08-08 16:51           ` Alexei Starovoitov
2024-08-08 20:35             ` Andrii Nakryiko
2024-08-09  1:23               ` Alexei Starovoitov
2024-08-09 17:23                 ` Andrii Nakryiko
2024-08-10  3:29             ` Al Viro
2024-08-12 20:05               ` Andrii Nakryiko
2024-08-13  2:06                 ` Al Viro
2024-08-13  3:32                   ` Andrii Nakryiko
2024-07-30  5:16   ` [PATCH 18/39] bpf maps: switch to CLASS(fd, ...) viro
2024-08-07 10:34     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 19/39] fdget_raw() users: switch to CLASS(fd_raw, ...) viro
2024-08-07 10:35     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 20/39] introduce "fd_pos" class, convert fdget_pos() users to it viro
2024-08-07 10:36     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 21/39] o2hb_region_dev_store(): avoid goto around fdget()/fdput() viro
2024-07-30  5:16   ` [PATCH 22/39] privcmd_ioeventfd_assign(): don't open-code eventfd_ctx_fdget() viro
2024-07-30  5:16   ` [PATCH 23/39] fdget(), trivial conversions viro
2024-08-07 10:37     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 24/39] fdget(), more " viro
2024-08-07 10:39     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 25/39] convert do_preadv()/do_pwritev() viro
2024-08-07 10:39     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 26/39] convert cachestat(2) viro
2024-08-07 10:39     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 27/39] switch spufs_calls_{get,put}() to CLASS() use viro
2024-07-30  5:16   ` [PATCH 28/39] convert spu_run(2) viro
2024-08-07 10:40     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 29/39] convert media_request_get_by_fd() viro
2024-08-07 10:40     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 30/39] convert coda_parse_fd() viro
2024-08-07 10:41     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 31/39] convert cifs_ioctl_copychunk() viro
2024-08-07 10:41     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 32/39] convert vfs_dedupe_file_range() viro
2024-08-07 10:42     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 33/39] convert do_select() viro
2024-08-07 10:42     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 34/39] do_pollfd(): convert to CLASS(fd) viro
2024-08-07 10:43     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 35/39] convert bpf_token_create() viro
2024-08-06 22:42     ` Andrii Nakryiko
2024-08-10  3:46       ` Al Viro
2024-08-12 20:06         ` Andrii Nakryiko
2024-08-07 10:44     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 36/39] assorted variants of irqfd setup: convert to CLASS(fd) viro
2024-08-07 10:46     ` Christian Brauner
2024-08-10  3:53       ` Al Viro
2024-07-30  5:16   ` [PATCH 37/39] memcg_write_event_control(): switch " viro
2024-08-07 10:47     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 38/39] css_set_fork(): switch to CLASS(fd_raw, ...) viro
2024-08-07 10:47     ` Christian Brauner
2024-07-30  5:16   ` [PATCH 39/39] deal with the last remaing boolean uses of fd_file() viro
2024-08-07 10:48     ` Christian Brauner
2024-07-30  7:13   ` [PATCH 01/39] memcg_write_event_control(): fix a user-triggerable oops Michal Hocko
2024-07-30  7:18     ` Al Viro
2024-07-30  7:37       ` Michal Hocko
2024-07-30  5:17 ` [PATCHSET][RFC] struct fd and memory safety Al Viro
2024-07-30 20:02 ` Josef Bacik
2024-07-31  0:43 ` Al Viro
2024-08-06 17:58 ` Jason Gunthorpe
2024-08-06 18:56   ` Al Viro
2024-08-07 10:51 ` Christian Brauner
2024-11-02  5:02 ` [PATCHSET v3] " Al Viro
2024-11-02  5:07   ` [PATCH v3 01/28] net/socket.c: switch to CLASS(fd) Al Viro
2024-11-02  5:08     ` [PATCH v3 02/28] regularize emptiness checks in fini_module(2) and vfs_dedupe_file_range() Al Viro
2024-11-02  5:08     ` [PATCH v3 03/28] timerfd: switch to CLASS(fd) Al Viro
2024-11-02  5:08     ` [PATCH v3 04/28] get rid of perf_fget_light(), convert kernel/events/core.c " Al Viro
2024-11-02  5:08     ` [PATCH v3 05/28] switch netlink_getsockbyfilp() to taking descriptor Al Viro
2024-11-02  5:08     ` [PATCH v3 06/28] do_mq_notify(): saner skb freeing on failures Al Viro
2024-11-02  5:08     ` [PATCH v3 07/28] do_mq_notify(): switch to CLASS(fd) Al Viro
2024-11-02  5:08     ` [PATCH v3 08/28] simplify xfs_find_handle() a bit Al Viro
2024-11-02  5:08     ` [PATCH v3 09/28] convert vmsplice() to CLASS(fd) Al Viro
2024-11-02  5:08     ` [PATCH v3 10/28] fdget_raw() users: switch to CLASS(fd_raw) Al Viro
2024-11-02  5:08     ` [PATCH v3 11/28] introduce "fd_pos" class, convert fdget_pos() users to it Al Viro
2024-11-02  5:08     ` [PATCH v3 12/28] o2hb_region_dev_store(): avoid goto around fdget()/fdput() Al Viro
2024-11-02  5:08     ` [PATCH v3 13/28] privcmd_ioeventfd_assign(): don't open-code eventfd_ctx_fdget() Al Viro
2024-11-02  5:08     ` [PATCH v3 14/28] fdget(), trivial conversions Al Viro
2024-11-11 17:22       ` Francesco Lavra
2024-11-02  5:08     ` [PATCH v3 15/28] fdget(), more " Al Viro
2024-11-02  5:08     ` [PATCH v3 16/28] convert do_preadv()/do_pwritev() Al Viro
2024-11-02  5:08     ` [PATCH v3 17/28] convert cachestat(2) Al Viro
2024-11-02  5:08     ` [PATCH v3 18/28] switch spufs_calls_{get,put}() to CLASS() use Al Viro
2024-11-02  5:08     ` [PATCH v3 19/28] convert spu_run(2) Al Viro
2024-11-02  5:08     ` [PATCH v3 20/28] convert media_request_get_by_fd() Al Viro
2024-11-02  5:08     ` [PATCH v3 21/28] convert cifs_ioctl_copychunk() Al Viro
2024-11-02  5:08     ` [PATCH v3 22/28] convert vfs_dedupe_file_range() Al Viro
2024-11-02  5:08     ` [PATCH v3 23/28] convert do_select() Al Viro
2024-11-02  5:08     ` [PATCH v3 24/28] do_pollfd(): convert to CLASS(fd) Al Viro
2024-11-02  5:08     ` [PATCH v3 25/28] assorted variants of irqfd setup: " Al Viro
2024-11-02  5:08     ` [PATCH v3 26/28] memcg_write_event_control(): switch " Al Viro
2024-11-02  5:08     ` [PATCH v3 27/28] css_set_fork(): switch to CLASS(fd_raw, ...) Al Viro
2024-11-02  5:08     ` [PATCH v3 28/28] deal with the last remaing boolean uses of fd_file() Al Viro
2024-11-02 12:21     ` [PATCH v3 01/28] net/socket.c: switch to CLASS(fd) Simon Horman
2024-11-03  6:31       ` Al Viro
2024-11-06 10:03         ` Simon Horman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240730051625.14349-3-viro@kernel.org \
    --to=viro@kernel.org \
    --cc=amir73il@gmail.com \
    --cc=bpf@vger.kernel.org \
    --cc=brauner@kernel.org \
    --cc=cgroups@vger.kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).