public inbox for linux-fsdevel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] fuse: Allow to align reads/writes
@ 2024-07-02 16:31 Bernd Schubert
  2024-07-03 11:59 ` Bernd Schubert
  2024-07-03 15:15 ` Josef Bacik
  0 siblings, 2 replies; 11+ messages in thread
From: Bernd Schubert @ 2024-07-02 16:31 UTC (permalink / raw)
  To: miklos; +Cc: linux-fsdevel, bernd.schubert, Bernd Schubert

Read/writes IOs should be page aligned as fuse server
might need to copy data to another buffer otherwise in
order to fulfill network or device storage requirements.

Simple reproducer is with libfuse, example/passthrough*
and opening a file with O_DIRECT - without this change
writing to that file failed with -EINVAL if the underlying
file system was using ext4 (for passthrough_hp the
'passthrough' feature has to be disabled).

Given this needs server side changes as new feature flag is
introduced.

Disadvantage of aligned writes is that server side needs
needs another splice syscall (when splice is used) to seek
over the unaligned area - i.e. syscall and memory copy overhead.

Signed-off-by: Bernd Schubert <bschubert@ddn.com>

---
From implementation point of view 'struct fuse_in_arg' /
'struct fuse_arg' gets another parameter 'align_size', which has to
be set by fuse_write_args_fill. For all other fuse operations this
parameter has to be 0, which is guranteed by the existing
initialization via FUSE_ARGS and C99 style
initialization { .size = 0, .value = NULL }, i.e. other members are
zero.
Another choice would have been to extend fuse_write_in to
PAGE_SIZE - sizeof(fuse_in_header), but then would be an
arch/PAGE_SIZE depending struct size and would also require
lots of stack usage.
---
 fs/fuse/dev.c             | 21 +++++++++++++++++++--
 fs/fuse/file.c            | 12 ++++++++++++
 fs/fuse/fuse_i.h          |  9 +++++++--
 fs/fuse/inode.c           |  5 ++++-
 include/uapi/linux/fuse.h | 13 +++++++++++--
 5 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 9eb191b5c4de..a13793507d0b 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1009,6 +1009,20 @@ static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
 	return 0;
 }
 
+static int fuse_copy_align(struct fuse_copy_state *cs, unsigned int align_size)
+{
+	/* Might happen if fuse-server does not use page aligned buffers */
+	if (cs->len < align_size) {
+		pr_info("Remaining cs->len (%u) too small for alignment (%u)\n",
+			cs->len, align_size);
+		return -EINVAL;
+	}
+	cs->len -= align_size;
+	cs->offset += align_size;
+
+	return 0;
+}
+
 /* Copy request arguments to/from userspace buffer */
 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
 			  unsigned argpages, struct fuse_arg *args,
@@ -1019,10 +1033,13 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
 
 	for (i = 0; !err && i < numargs; i++)  {
 		struct fuse_arg *arg = &args[i];
-		if (i == numargs - 1 && argpages)
+		if (i == numargs - 1 && argpages) {
 			err = fuse_copy_pages(cs, arg->size, zeroing);
-		else
+		} else {
 			err = fuse_copy_one(cs, arg->value, arg->size);
+			if (!err && arg->align_size)
+				err = fuse_copy_align(cs, arg->align_size);
+		}
 	}
 	return err;
 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f39456c65ed7..0e1c540c6139 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1062,6 +1062,18 @@ static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff,
 		args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
 	else
 		args->in_args[0].size = sizeof(ia->write.in);
+
+	if (ff->fm->fc->align_writes) {
+		/*
+		 * add an extra alignment offset after the fuse header to
+		 * the next page
+		 */
+		args->in_args[0].align_size = PAGE_SIZE -
+					      sizeof(struct fuse_in_header) -
+					      sizeof(ia->write.in);
+		ia->write.in.align_size = args->in_args[0].align_size;
+	}
+
 	args->in_args[0].value = &ia->write.in;
 	args->in_args[1].size = count;
 	args->out_numargs = 1;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index f23919610313..cb15153c6785 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -275,13 +275,15 @@ struct fuse_file {
 
 /** One input argument of a request */
 struct fuse_in_arg {
-	unsigned size;
+	unsigned int size;
+	unsigned int align_size;
 	const void *value;
 };
 
 /** One output argument of a request */
 struct fuse_arg {
-	unsigned size;
+	unsigned int size;
+	unsigned int align_size;
 	void *value;
 };
 
@@ -860,6 +862,9 @@ struct fuse_conn {
 	/** Passthrough support for read/write IO */
 	unsigned int passthrough:1;
 
+	/** Should (write) data be page aligned? */
+	unsigned int align_writes:1;
+
 	/** Maximum stack depth for passthrough backing files */
 	int max_stack_depth;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 99e44ea7d875..e8b42859f553 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1331,6 +1331,9 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
 			}
 			if (flags & FUSE_NO_EXPORT_SUPPORT)
 				fm->sb->s_export_op = &fuse_export_fid_operations;
+
+			if (flags & FUSE_ALIGN_WRITES)
+				fc->align_writes = 1;
 		} else {
 			ra_pages = fc->max_read / PAGE_SIZE;
 			fc->no_lock = 1;
@@ -1378,7 +1381,7 @@ void fuse_send_init(struct fuse_mount *fm)
 		FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
 		FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
 		FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP |
-		FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND;
+		FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_ALIGN_WRITES;
 #ifdef CONFIG_FUSE_DAX
 	if (fm->fc->dax)
 		flags |= FUSE_MAP_ALIGNMENT;
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index d08b99d60f6f..4f5ddd7fe9b4 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -217,6 +217,11 @@
  *  - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag
  *  - add FUSE_NO_EXPORT_SUPPORT init flag
  *  - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag
+ *
+ * 7.41
+ *  - add FUSE_ALIGN_WRITES init flag
+ *  - make use of padding in struct fuse_write_in when
+ *    initialization agrees on aligned writes
  */
 
 #ifndef _LINUX_FUSE_H
@@ -252,7 +257,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 40
+#define FUSE_KERNEL_MINOR_VERSION 41
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -421,6 +426,8 @@ struct fuse_file_lock {
  * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support
  * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit
  *		    of the request ID indicates resend requests
+ * FUSE_ALIGN_WRITES: For opcode FUSE_WRITE,  data follow the headers with a
+ *		      page aligned offset
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -463,6 +470,7 @@ struct fuse_file_lock {
 #define FUSE_PASSTHROUGH	(1ULL << 37)
 #define FUSE_NO_EXPORT_SUPPORT	(1ULL << 38)
 #define FUSE_HAS_RESEND		(1ULL << 39)
+#define FUSE_ALIGN_WRITES	(1ULL << 40)
 
 /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
 #define FUSE_DIRECT_IO_RELAX	FUSE_DIRECT_IO_ALLOW_MMAP
@@ -496,6 +504,7 @@ struct fuse_file_lock {
  * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed
  * FUSE_WRITE_LOCKOWNER: lock_owner field is valid
  * FUSE_WRITE_KILL_SUIDGID: kill suid and sgid bits
+ * FUSE_WRITE_ALIGNED: Data are at an page size aligned offset
  */
 #define FUSE_WRITE_CACHE	(1 << 0)
 #define FUSE_WRITE_LOCKOWNER	(1 << 1)
@@ -812,7 +821,7 @@ struct fuse_write_in {
 	uint32_t	write_flags;
 	uint64_t	lock_owner;
 	uint32_t	flags;
-	uint32_t	padding;
+	uint32_t	align_size; /* extra alignment offset to the next page */
 };
 
 struct fuse_write_out {
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-07-04 15:49 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-02 16:31 [PATCH] fuse: Allow to align reads/writes Bernd Schubert
2024-07-03 11:59 ` Bernd Schubert
2024-07-03 15:15 ` Josef Bacik
2024-07-03 15:58   ` Bernd Schubert
2024-07-03 17:30     ` Josef Bacik
2024-07-03 17:49       ` Joanne Koong
2024-07-03 18:07         ` Bernd Schubert
2024-07-03 20:28           ` Joanne Koong
2024-07-03 20:44             ` Bernd Schubert
2024-07-04 15:10               ` Josef Bacik
2024-07-04 15:49                 ` Bernd Schubert

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox