public inbox for linux-unionfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/5] Support nested overlayfs mounts
@ 2023-08-17 11:05 Alexander Larsson
  2023-08-17 11:05 ` [PATCH v2 1/5] ovl: Move xattr support to new xattrs.c file Alexander Larsson
                   ` (5 more replies)
  0 siblings, 6 replies; 47+ messages in thread
From: Alexander Larsson @ 2023-08-17 11:05 UTC (permalink / raw)
  To: miklos; +Cc: linux-unionfs, amir73il, Alexander Larsson

There are cases where you want to use an overlayfs mount as a lowerdir for
another overlayfs mount. For example, if the system rootfs is on overlayfs due
to composefs, or to make it volatile (via tmps), then you cannot currently store
a lowerdir on the rootfs, becasue the inner overlayfs will eat all the whiteouts
and overlay xattrs. This means you can't e.g. store on the rootfs a prepared
container image for use using overlayfs.

This patch series adds support for nesting of overlayfs mounts by escaping the
problematic features on and unescaping them when exposing to the overlayfs user.

This series is also available here:
  https://github.com/alexlarsson/linux/tree/ovl-nesting

And xfstest to test it is available here:
  https://github.com/alexlarsson/xfstests/tree/overlayfs-nesting

Changes since v1:

 * Moved all xattr handling to xattr.c
 * Made creation of escaped whiteouts atomic

Alexander Larsson (5):
  ovl: Move xattr support to new xattrs.c file
  ovl: Add OVL_XATTR_TRUSTED/USER_PREFIX_LEN macros
  ovl: Support escaped overlay.* xattrs
  ovl: Support creation of whiteout files on overlayfs
  ovl: Add documentation on nesting of overlayfs mounts

 Documentation/filesystems/overlayfs.rst |  22 ++
 fs/overlayfs/Makefile                   |   2 +-
 fs/overlayfs/dir.c                      |  24 ++-
 fs/overlayfs/inode.c                    | 124 -----------
 fs/overlayfs/namei.c                    |  14 +-
 fs/overlayfs/overlayfs.h                |  40 +++-
 fs/overlayfs/readdir.c                  |   7 +-
 fs/overlayfs/super.c                    |  67 +-----
 fs/overlayfs/util.c                     |  20 ++
 fs/overlayfs/xattrs.c                   | 268 ++++++++++++++++++++++++
 10 files changed, 376 insertions(+), 212 deletions(-)
 create mode 100644 fs/overlayfs/xattrs.c

-- 
2.41.0


^ permalink raw reply	[flat|nested] 47+ messages in thread

* [PATCH v2 1/5] ovl: Move xattr support to new xattrs.c file
  2023-08-17 11:05 [PATCH v2 0/5] Support nested overlayfs mounts Alexander Larsson
@ 2023-08-17 11:05 ` Alexander Larsson
  2023-08-17 12:48   ` Amir Goldstein
  2023-08-17 11:05 ` [PATCH v2 2/5] ovl: Add OVL_XATTR_TRUSTED/USER_PREFIX_LEN macros Alexander Larsson
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 47+ messages in thread
From: Alexander Larsson @ 2023-08-17 11:05 UTC (permalink / raw)
  To: miklos; +Cc: linux-unionfs, amir73il, Alexander Larsson

This moves the code from from super.c and inode.c, and makes
ovl_xattr_get/set() static.

This is in preparation for doing more work on xattrs support.

Signed-off-by: Alexander Larsson <alexl@redhat.com>
---
 fs/overlayfs/Makefile    |   2 +-
 fs/overlayfs/inode.c     | 124 ------------------------
 fs/overlayfs/overlayfs.h |  18 ++--
 fs/overlayfs/super.c     |  65 +------------
 fs/overlayfs/xattrs.c    | 198 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 209 insertions(+), 198 deletions(-)
 create mode 100644 fs/overlayfs/xattrs.c

diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
index 4e173d56b11f..5648954f8588 100644
--- a/fs/overlayfs/Makefile
+++ b/fs/overlayfs/Makefile
@@ -6,4 +6,4 @@
 obj-$(CONFIG_OVERLAY_FS) += overlay.o
 
 overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \
-		copy_up.o export.o params.o
+		copy_up.o export.o params.o xattrs.o
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index b395cd84bfce..375edf832641 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -339,130 +339,6 @@ static const char *ovl_get_link(struct dentry *dentry,
 	return p;
 }
 
-bool ovl_is_private_xattr(struct super_block *sb, const char *name)
-{
-	struct ovl_fs *ofs = OVL_FS(sb);
-
-	if (ofs->config.userxattr)
-		return strncmp(name, OVL_XATTR_USER_PREFIX,
-			       sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0;
-	else
-		return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
-			       sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0;
-}
-
-int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
-		  const void *value, size_t size, int flags)
-{
-	int err;
-	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
-	struct dentry *upperdentry = ovl_i_dentry_upper(inode);
-	struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
-	struct path realpath;
-	const struct cred *old_cred;
-
-	err = ovl_want_write(dentry);
-	if (err)
-		goto out;
-
-	if (!value && !upperdentry) {
-		ovl_path_lower(dentry, &realpath);
-		old_cred = ovl_override_creds(dentry->d_sb);
-		err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0);
-		revert_creds(old_cred);
-		if (err < 0)
-			goto out_drop_write;
-	}
-
-	if (!upperdentry) {
-		err = ovl_copy_up(dentry);
-		if (err)
-			goto out_drop_write;
-
-		realdentry = ovl_dentry_upper(dentry);
-	}
-
-	old_cred = ovl_override_creds(dentry->d_sb);
-	if (value) {
-		err = ovl_do_setxattr(ofs, realdentry, name, value, size,
-				      flags);
-	} else {
-		WARN_ON(flags != XATTR_REPLACE);
-		err = ovl_do_removexattr(ofs, realdentry, name);
-	}
-	revert_creds(old_cred);
-
-	/* copy c/mtime */
-	ovl_copyattr(inode);
-
-out_drop_write:
-	ovl_drop_write(dentry);
-out:
-	return err;
-}
-
-int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
-		  void *value, size_t size)
-{
-	ssize_t res;
-	const struct cred *old_cred;
-	struct path realpath;
-
-	ovl_i_path_real(inode, &realpath);
-	old_cred = ovl_override_creds(dentry->d_sb);
-	res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size);
-	revert_creds(old_cred);
-	return res;
-}
-
-static bool ovl_can_list(struct super_block *sb, const char *s)
-{
-	/* Never list private (.overlay) */
-	if (ovl_is_private_xattr(sb, s))
-		return false;
-
-	/* List all non-trusted xattrs */
-	if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
-		return true;
-
-	/* list other trusted for superuser only */
-	return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
-}
-
-ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
-{
-	struct dentry *realdentry = ovl_dentry_real(dentry);
-	ssize_t res;
-	size_t len;
-	char *s;
-	const struct cred *old_cred;
-
-	old_cred = ovl_override_creds(dentry->d_sb);
-	res = vfs_listxattr(realdentry, list, size);
-	revert_creds(old_cred);
-	if (res <= 0 || size == 0)
-		return res;
-
-	/* filter out private xattrs */
-	for (s = list, len = res; len;) {
-		size_t slen = strnlen(s, len) + 1;
-
-		/* underlying fs providing us with an broken xattr list? */
-		if (WARN_ON(slen > len))
-			return -EIO;
-
-		len -= slen;
-		if (!ovl_can_list(dentry->d_sb, s)) {
-			res -= slen;
-			memmove(s, s + slen, len);
-		} else {
-			s += slen;
-		}
-	}
-
-	return res;
-}
-
 #ifdef CONFIG_FS_POSIX_ACL
 /*
  * Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 72f57d919aa9..1283b7126b94 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -684,17 +684,8 @@ int ovl_set_nlink_lower(struct dentry *dentry);
 unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry,
 			   struct dentry *upperdentry,
 			   unsigned int fallback);
-int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
-		struct iattr *attr);
-int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
-		struct kstat *stat, u32 request_mask, unsigned int flags);
 int ovl_permission(struct mnt_idmap *idmap, struct inode *inode,
 		   int mask);
-int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
-		  const void *value, size_t size, int flags);
-int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
-		  void *value, size_t size);
-ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
 
 #ifdef CONFIG_FS_POSIX_ACL
 struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap,
@@ -830,3 +821,12 @@ static inline bool ovl_force_readonly(struct ovl_fs *ofs)
 {
 	return (!ovl_upper_mnt(ofs) || !ofs->workdir);
 }
+
+/* xattr.c */
+
+const struct xattr_handler **ovl_xattr_handlers(struct ovl_fs *ofs);
+int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+		struct iattr *attr);
+int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
+		struct kstat *stat, u32 request_mask, unsigned int flags);
+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index def266b5e2a3..a3be13306c73 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -434,68 +434,6 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
 	return ok;
 }
 
-static int ovl_own_xattr_get(const struct xattr_handler *handler,
-			     struct dentry *dentry, struct inode *inode,
-			     const char *name, void *buffer, size_t size)
-{
-	return -EOPNOTSUPP;
-}
-
-static int ovl_own_xattr_set(const struct xattr_handler *handler,
-			     struct mnt_idmap *idmap,
-			     struct dentry *dentry, struct inode *inode,
-			     const char *name, const void *value,
-			     size_t size, int flags)
-{
-	return -EOPNOTSUPP;
-}
-
-static int ovl_other_xattr_get(const struct xattr_handler *handler,
-			       struct dentry *dentry, struct inode *inode,
-			       const char *name, void *buffer, size_t size)
-{
-	return ovl_xattr_get(dentry, inode, name, buffer, size);
-}
-
-static int ovl_other_xattr_set(const struct xattr_handler *handler,
-			       struct mnt_idmap *idmap,
-			       struct dentry *dentry, struct inode *inode,
-			       const char *name, const void *value,
-			       size_t size, int flags)
-{
-	return ovl_xattr_set(dentry, inode, name, value, size, flags);
-}
-
-static const struct xattr_handler ovl_own_trusted_xattr_handler = {
-	.prefix	= OVL_XATTR_TRUSTED_PREFIX,
-	.get = ovl_own_xattr_get,
-	.set = ovl_own_xattr_set,
-};
-
-static const struct xattr_handler ovl_own_user_xattr_handler = {
-	.prefix	= OVL_XATTR_USER_PREFIX,
-	.get = ovl_own_xattr_get,
-	.set = ovl_own_xattr_set,
-};
-
-static const struct xattr_handler ovl_other_xattr_handler = {
-	.prefix	= "", /* catch all */
-	.get = ovl_other_xattr_get,
-	.set = ovl_other_xattr_set,
-};
-
-static const struct xattr_handler *ovl_trusted_xattr_handlers[] = {
-	&ovl_own_trusted_xattr_handler,
-	&ovl_other_xattr_handler,
-	NULL
-};
-
-static const struct xattr_handler *ovl_user_xattr_handlers[] = {
-	&ovl_own_user_xattr_handler,
-	&ovl_other_xattr_handler,
-	NULL
-};
-
 static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
 			  struct inode **ptrap, const char *name)
 {
@@ -1478,8 +1416,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
 	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
 
 	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
-	sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers :
-		ovl_trusted_xattr_handlers;
+	sb->s_xattr = ovl_xattr_handlers(ofs);
 	sb->s_fs_info = ofs;
 	sb->s_flags |= SB_POSIXACL;
 	sb->s_iflags |= SB_I_SKIP_SYNC | SB_I_IMA_UNVERIFIABLE_SIGNATURE;
diff --git a/fs/overlayfs/xattrs.c b/fs/overlayfs/xattrs.c
new file mode 100644
index 000000000000..edc7cc49a7c4
--- /dev/null
+++ b/fs/overlayfs/xattrs.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/fs.h>
+#include <linux/xattr.h>
+#include "overlayfs.h"
+
+bool ovl_is_private_xattr(struct super_block *sb, const char *name)
+{
+	struct ovl_fs *ofs = OVL_FS(sb);
+
+	if (ofs->config.userxattr)
+		return strncmp(name, OVL_XATTR_USER_PREFIX,
+			       sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0;
+	else
+		return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
+			       sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0;
+}
+
+static int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
+			 const void *value, size_t size, int flags)
+{
+	int err;
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	struct dentry *upperdentry = ovl_i_dentry_upper(inode);
+	struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
+	struct path realpath;
+	const struct cred *old_cred;
+
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
+	if (!value && !upperdentry) {
+		ovl_path_lower(dentry, &realpath);
+		old_cred = ovl_override_creds(dentry->d_sb);
+		err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0);
+		revert_creds(old_cred);
+		if (err < 0)
+			goto out_drop_write;
+	}
+
+	if (!upperdentry) {
+		err = ovl_copy_up(dentry);
+		if (err)
+			goto out_drop_write;
+
+		realdentry = ovl_dentry_upper(dentry);
+	}
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	if (value) {
+		err = ovl_do_setxattr(ofs, realdentry, name, value, size,
+				      flags);
+	} else {
+		WARN_ON(flags != XATTR_REPLACE);
+		err = ovl_do_removexattr(ofs, realdentry, name);
+	}
+	revert_creds(old_cred);
+
+	/* copy c/mtime */
+	ovl_copyattr(inode);
+
+out_drop_write:
+	ovl_drop_write(dentry);
+out:
+	return err;
+}
+
+static int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
+			 void *value, size_t size)
+{
+	ssize_t res;
+	const struct cred *old_cred;
+	struct path realpath;
+
+	ovl_i_path_real(inode, &realpath);
+	old_cred = ovl_override_creds(dentry->d_sb);
+	res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size);
+	revert_creds(old_cred);
+	return res;
+}
+
+static bool ovl_can_list(struct super_block *sb, const char *s)
+{
+	/* Never list private (.overlay) */
+	if (ovl_is_private_xattr(sb, s))
+		return false;
+
+	/* List all non-trusted xattrs */
+	if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
+		return true;
+
+	/* list other trusted for superuser only */
+	return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
+}
+
+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+	struct dentry *realdentry = ovl_dentry_real(dentry);
+	ssize_t res;
+	size_t len;
+	char *s;
+	const struct cred *old_cred;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	res = vfs_listxattr(realdentry, list, size);
+	revert_creds(old_cred);
+	if (res <= 0 || size == 0)
+		return res;
+
+	/* filter out private xattrs */
+	for (s = list, len = res; len;) {
+		size_t slen = strnlen(s, len) + 1;
+
+		/* underlying fs providing us with an broken xattr list? */
+		if (WARN_ON(slen > len))
+			return -EIO;
+
+		len -= slen;
+		if (!ovl_can_list(dentry->d_sb, s)) {
+			res -= slen;
+			memmove(s, s + slen, len);
+		} else {
+			s += slen;
+		}
+	}
+
+	return res;
+}
+
+static int ovl_own_xattr_get(const struct xattr_handler *handler,
+			     struct dentry *dentry, struct inode *inode,
+			     const char *name, void *buffer, size_t size)
+{
+	return -EOPNOTSUPP;
+}
+
+static int ovl_own_xattr_set(const struct xattr_handler *handler,
+			     struct mnt_idmap *idmap,
+			     struct dentry *dentry, struct inode *inode,
+			     const char *name, const void *value,
+			     size_t size, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static int ovl_other_xattr_get(const struct xattr_handler *handler,
+			       struct dentry *dentry, struct inode *inode,
+			       const char *name, void *buffer, size_t size)
+{
+	return ovl_xattr_get(dentry, inode, name, buffer, size);
+}
+
+static int ovl_other_xattr_set(const struct xattr_handler *handler,
+			       struct mnt_idmap *idmap,
+			       struct dentry *dentry, struct inode *inode,
+			       const char *name, const void *value,
+			       size_t size, int flags)
+{
+	return ovl_xattr_set(dentry, inode, name, value, size, flags);
+}
+
+static const struct xattr_handler ovl_own_trusted_xattr_handler = {
+	.prefix	= OVL_XATTR_TRUSTED_PREFIX,
+	.get = ovl_own_xattr_get,
+	.set = ovl_own_xattr_set,
+};
+
+static const struct xattr_handler ovl_own_user_xattr_handler = {
+	.prefix	= OVL_XATTR_USER_PREFIX,
+	.get = ovl_own_xattr_get,
+	.set = ovl_own_xattr_set,
+};
+
+static const struct xattr_handler ovl_other_xattr_handler = {
+	.prefix	= "", /* catch all */
+	.get = ovl_other_xattr_get,
+	.set = ovl_other_xattr_set,
+};
+
+static const struct xattr_handler *ovl_trusted_xattr_handlers[] = {
+	&ovl_own_trusted_xattr_handler,
+	&ovl_other_xattr_handler,
+	NULL
+};
+
+static const struct xattr_handler *ovl_user_xattr_handlers[] = {
+	&ovl_own_user_xattr_handler,
+	&ovl_other_xattr_handler,
+	NULL
+};
+
+const struct xattr_handler **ovl_xattr_handlers(struct ovl_fs *ofs)
+{
+	return ofs->config.userxattr ? ovl_user_xattr_handlers :
+		ovl_trusted_xattr_handlers;
+}
+
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH v2 2/5] ovl: Add OVL_XATTR_TRUSTED/USER_PREFIX_LEN macros
  2023-08-17 11:05 [PATCH v2 0/5] Support nested overlayfs mounts Alexander Larsson
  2023-08-17 11:05 ` [PATCH v2 1/5] ovl: Move xattr support to new xattrs.c file Alexander Larsson
@ 2023-08-17 11:05 ` Alexander Larsson
  2023-08-17 12:48   ` Amir Goldstein
  2023-08-17 11:05 ` [PATCH v2 3/5] ovl: Support escaped overlay.* xattrs Alexander Larsson
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 47+ messages in thread
From: Alexander Larsson @ 2023-08-17 11:05 UTC (permalink / raw)
  To: miklos; +Cc: linux-unionfs, amir73il, Alexander Larsson

These match the ones for e.g. XATTR_TRUSTED_PREFIX_LEN.

Signed-off-by: Alexander Larsson <alexl@redhat.com>
---
 fs/overlayfs/overlayfs.h | 2 ++
 fs/overlayfs/xattrs.c    | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 1283b7126b94..ef993a543b2a 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -28,7 +28,9 @@ enum ovl_path_type {
 
 #define OVL_XATTR_NAMESPACE "overlay."
 #define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE
+#define OVL_XATTR_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1)
 #define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE
+#define OVL_XATTR_USER_PREFIX_LEN (sizeof(OVL_XATTR_USER_PREFIX) - 1)
 
 enum ovl_xattr {
 	OVL_XATTR_OPAQUE,
diff --git a/fs/overlayfs/xattrs.c b/fs/overlayfs/xattrs.c
index edc7cc49a7c4..b8ea96606ea8 100644
--- a/fs/overlayfs/xattrs.c
+++ b/fs/overlayfs/xattrs.c
@@ -10,10 +10,10 @@ bool ovl_is_private_xattr(struct super_block *sb, const char *name)
 
 	if (ofs->config.userxattr)
 		return strncmp(name, OVL_XATTR_USER_PREFIX,
-			       sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0;
+			       OVL_XATTR_USER_PREFIX_LEN) == 0;
 	else
 		return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
-			       sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0;
+			       OVL_XATTR_TRUSTED_PREFIX_LEN) == 0;
 }
 
 static int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH v2 3/5] ovl: Support escaped overlay.* xattrs
  2023-08-17 11:05 [PATCH v2 0/5] Support nested overlayfs mounts Alexander Larsson
  2023-08-17 11:05 ` [PATCH v2 1/5] ovl: Move xattr support to new xattrs.c file Alexander Larsson
  2023-08-17 11:05 ` [PATCH v2 2/5] ovl: Add OVL_XATTR_TRUSTED/USER_PREFIX_LEN macros Alexander Larsson
@ 2023-08-17 11:05 ` Alexander Larsson
  2023-08-17 14:31   ` Amir Goldstein
  2023-08-17 11:05 ` [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs Alexander Larsson
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 47+ messages in thread
From: Alexander Larsson @ 2023-08-17 11:05 UTC (permalink / raw)
  To: miklos; +Cc: linux-unionfs, amir73il, Alexander Larsson

There are cases where you want to use an overlayfs mount as a lowerdir
for another overlayfs mount. For example, if the system rootfs is on
overlayfs due to composefs, or to make it volatile (via tmps), then
you cannot currently store a lowerdir on the rootfs. This means you
can't e.g. store on the rootfs a prepared container image for use
using overlayfs.

To work around this, we introduce an escapment mechanism for overlayfs
xattrs. Whenever the lower/upper dir has a xattr named
`overlay.overlay.XYZ`, we list it as overlay.XYZ in listxattrs, and
when the user calls getxattr or setxattr on `overlay.XYZ`, we apply to
`overlay.overlay.XYZ` in the backing directories.

This allows storing any kind of overlay xattrs in a overlayfs mount
that can be used as a lowerdir in another mount. It is possible to
stack this mechanism multiple times, such that
overlay.overlay.overlay.XYZ will survive two levels of overlay mounts,
however this is not all that useful in practice because of stack depth
limitations of overlayfs mounts.

Signed-off-by: Alexander Larsson <alexl@redhat.com>
---
 fs/overlayfs/overlayfs.h |  7 ++++
 fs/overlayfs/xattrs.c    | 78 +++++++++++++++++++++++++++++++++++++---
 2 files changed, 81 insertions(+), 4 deletions(-)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index ef993a543b2a..311e1f37ce84 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -32,6 +32,13 @@ enum ovl_path_type {
 #define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE
 #define OVL_XATTR_USER_PREFIX_LEN (sizeof(OVL_XATTR_USER_PREFIX) - 1)
 
+#define OVL_XATTR_ESCAPE_PREFIX OVL_XATTR_NAMESPACE
+#define OVL_XATTR_ESCAPE_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_PREFIX) - 1)
+#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_ESCAPE_PREFIX
+#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_TRUSTED_PREFIX) - 1)
+#define OVL_XATTR_ESCAPE_USER_PREFIX OVL_XATTR_USER_PREFIX OVL_XATTR_ESCAPE_PREFIX
+#define OVL_XATTR_ESCAPE_USER_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_USER_PREFIX) - 1)
+
 enum ovl_xattr {
 	OVL_XATTR_OPAQUE,
 	OVL_XATTR_REDIRECT,
diff --git a/fs/overlayfs/xattrs.c b/fs/overlayfs/xattrs.c
index b8ea96606ea8..27b31f812eb1 100644
--- a/fs/overlayfs/xattrs.c
+++ b/fs/overlayfs/xattrs.c
@@ -4,6 +4,18 @@
 #include <linux/xattr.h>
 #include "overlayfs.h"
 
+bool ovl_is_escaped_xattr(struct super_block *sb, const char *name)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	if (ofs->config.userxattr)
+		return strncmp(name, OVL_XATTR_ESCAPE_USER_PREFIX,
+			       OVL_XATTR_ESCAPE_USER_PREFIX_LEN) == 0;
+	else
+		return strncmp(name, OVL_XATTR_ESCAPE_TRUSTED_PREFIX,
+			       OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN - 1) == 0;
+}
+
 bool ovl_is_private_xattr(struct super_block *sb, const char *name)
 {
 	struct ovl_fs *ofs = OVL_FS(sb);
@@ -82,8 +94,8 @@ static int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char
 
 static bool ovl_can_list(struct super_block *sb, const char *s)
 {
-	/* Never list private (.overlay) */
-	if (ovl_is_private_xattr(sb, s))
+	/* Never list non-escaped private (.overlay) */
+	if (ovl_is_private_xattr(sb, s) && !ovl_is_escaped_xattr(sb, s))
 		return false;
 
 	/* List all non-trusted xattrs */
@@ -97,10 +109,12 @@ static bool ovl_can_list(struct super_block *sb, const char *s)
 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
 {
 	struct dentry *realdentry = ovl_dentry_real(dentry);
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 	ssize_t res;
 	size_t len;
 	char *s;
 	const struct cred *old_cred;
+	size_t prefix_len;
 
 	old_cred = ovl_override_creds(dentry->d_sb);
 	res = vfs_listxattr(realdentry, list, size);
@@ -108,6 +122,9 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
 	if (res <= 0 || size == 0)
 		return res;
 
+	prefix_len = ofs->config.userxattr ?
+		OVL_XATTR_USER_PREFIX_LEN : OVL_XATTR_TRUSTED_PREFIX_LEN;
+
 	/* filter out private xattrs */
 	for (s = list, len = res; len;) {
 		size_t slen = strnlen(s, len) + 1;
@@ -120,6 +137,12 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
 		if (!ovl_can_list(dentry->d_sb, s)) {
 			res -= slen;
 			memmove(s, s + slen, len);
+		} else if (ovl_is_escaped_xattr(dentry->d_sb, s)) {
+			memmove(s + prefix_len,
+				s + prefix_len + OVL_XATTR_ESCAPE_PREFIX_LEN,
+				slen - (prefix_len + OVL_XATTR_ESCAPE_PREFIX_LEN) + len);
+			res -= OVL_XATTR_ESCAPE_PREFIX_LEN;
+			s += slen - OVL_XATTR_ESCAPE_PREFIX_LEN;
 		} else {
 			s += slen;
 		}
@@ -128,11 +151,47 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
 	return res;
 }
 
+static char *ovl_xattr_escape_name(const char *prefix, const char *name)
+{
+	size_t prefix_len = strlen(prefix);
+	size_t name_len = strlen(name);
+	size_t escaped_len;
+	char *escaped, *s;
+
+	escaped_len = prefix_len + OVL_XATTR_ESCAPE_PREFIX_LEN + name_len;
+	if (escaped_len > XATTR_NAME_MAX)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	escaped = kmalloc(escaped_len + 1, GFP_KERNEL);
+	if (escaped == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	s = escaped;
+	memcpy(s, prefix, prefix_len);
+	s += prefix_len;
+	memcpy(s, OVL_XATTR_ESCAPE_PREFIX, OVL_XATTR_ESCAPE_PREFIX_LEN);
+	s += OVL_XATTR_ESCAPE_PREFIX_LEN;
+	memcpy(s, name, name_len + 1);
+
+	return escaped;
+}
+
 static int ovl_own_xattr_get(const struct xattr_handler *handler,
 			     struct dentry *dentry, struct inode *inode,
 			     const char *name, void *buffer, size_t size)
 {
-	return -EOPNOTSUPP;
+	char *escaped;
+	int r;
+
+	escaped = ovl_xattr_escape_name(handler->prefix, name);
+	if (IS_ERR(escaped))
+		return PTR_ERR(escaped);
+
+	r = ovl_xattr_get(dentry, inode, escaped, buffer, size);
+
+	kfree(escaped);
+
+	return r;
 }
 
 static int ovl_own_xattr_set(const struct xattr_handler *handler,
@@ -141,7 +200,18 @@ static int ovl_own_xattr_set(const struct xattr_handler *handler,
 			     const char *name, const void *value,
 			     size_t size, int flags)
 {
-	return -EOPNOTSUPP;
+	char *escaped;
+	int r;
+
+	escaped = ovl_xattr_escape_name(handler->prefix, name);
+	if (IS_ERR(escaped))
+		return PTR_ERR(escaped);
+
+	r = ovl_xattr_set(dentry, inode, escaped, value, size, flags);
+
+	kfree(escaped);
+
+	return r;
 }
 
 static int ovl_other_xattr_get(const struct xattr_handler *handler,
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-17 11:05 [PATCH v2 0/5] Support nested overlayfs mounts Alexander Larsson
                   ` (2 preceding siblings ...)
  2023-08-17 11:05 ` [PATCH v2 3/5] ovl: Support escaped overlay.* xattrs Alexander Larsson
@ 2023-08-17 11:05 ` Alexander Larsson
  2023-08-17 13:40   ` Amir Goldstein
  2023-08-21 10:59   ` Miklos Szeredi
  2023-08-17 11:05 ` [PATCH v2 5/5] ovl: Add documentation on nesting of overlayfs mounts Alexander Larsson
  2023-08-17 14:45 ` [PATCH v2 0/5] Support nested " Amir Goldstein
  5 siblings, 2 replies; 47+ messages in thread
From: Alexander Larsson @ 2023-08-17 11:05 UTC (permalink / raw)
  To: miklos; +Cc: linux-unionfs, amir73il, Alexander Larsson

This is needed to properly stack overlay filesystems, I.E, being able
to create a whiteout file on an overlay mount and then use that as
part of the lowerdir in another overlay mount.

The way this works is that we create a regular whiteout, but set the
`overlay.nowhiteout` xattr on it. Whenever we check if a file is a
whiteout we check this xattr and don't treat it as a whiteout if it is
set. The xattr itself is then stripped and when viewed as part of the
overlayfs mount it looks like a regular whiteout.

In order to make the creation of the whiteout file with xattr atomic
we always take ovl_create_over_whiteout() codepath when creating
whiteouts.

Signed-off-by: Alexander Larsson <alexl@redhat.com>
---
 fs/overlayfs/dir.c       | 24 +++++++++++++++++-------
 fs/overlayfs/namei.c     | 14 +++++++++-----
 fs/overlayfs/overlayfs.h | 13 +++++++++++++
 fs/overlayfs/readdir.c   |  7 ++++++-
 fs/overlayfs/super.c     |  2 +-
 fs/overlayfs/util.c      | 20 ++++++++++++++++++++
 6 files changed, 66 insertions(+), 14 deletions(-)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 033fc0458a3d..2f3442f5430d 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -442,6 +442,11 @@ static int ovl_set_upper_acl(struct ovl_fs *ofs, struct dentry *upperdentry,
 	return ovl_do_set_acl(ofs, upperdentry, acl_name, acl);
 }
 
+static bool ovl_cattr_is_whiteout(struct ovl_cattr *attr)
+{
+	return S_ISCHR(attr->mode) && attr->rdev == WHITEOUT_DEV;
+}
+
 static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
 				    struct ovl_cattr *cattr)
 {
@@ -477,7 +482,8 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
 		goto out_unlock;
 
 	err = -ESTALE;
-	if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper)))
+	if (!ovl_cattr_is_whiteout(cattr) &&
+	    (d_is_negative(upper) || !ovl_upper_is_whiteout(ofs, upper)))
 		goto out_dput;
 
 	newdentry = ovl_create_temp(ofs, workdir, cattr);
@@ -485,6 +491,13 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
 	if (IS_ERR(newdentry))
 		goto out_dput;
 
+	if (ovl_cattr_is_whiteout(cattr)) {
+		err = ovl_setxattr(ofs, newdentry, OVL_XATTR_NOWHITEOUT,
+				   NULL, 0);
+		if (err < 0)
+			goto out_cleanup;
+	}
+
 	/*
 	 * mode could have been mutilated due to umask (e.g. sgid directory)
 	 */
@@ -606,7 +619,8 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
 		put_cred(override_cred);
 	}
 
-	if (!ovl_dentry_is_whiteout(dentry))
+	/* Create whiteouts in workdir so we can atomically set nowiteout xattr */
+	if (!ovl_dentry_is_whiteout(dentry) && !ovl_cattr_is_whiteout(attr))
 		err = ovl_create_upper(dentry, inode, attr);
 	else
 		err = ovl_create_over_whiteout(dentry, inode, attr);
@@ -669,10 +683,6 @@ static int ovl_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 static int ovl_mknod(struct mnt_idmap *idmap, struct inode *dir,
 		     struct dentry *dentry, umode_t mode, dev_t rdev)
 {
-	/* Don't allow creation of "whiteout" on overlay */
-	if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
-		return -EPERM;
-
 	return ovl_create_object(dentry, mode, rdev, NULL);
 }
 
@@ -1219,7 +1229,7 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
 		}
 	} else {
 		if (!d_is_negative(newdentry)) {
-			if (!new_opaque || !ovl_is_whiteout(newdentry))
+			if (!new_opaque || !ovl_upper_is_whiteout(ofs, newdentry))
 				goto out_dput;
 		} else {
 			if (flags & RENAME_EXCHANGE)
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 80391c687c2a..e90167789a13 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -251,7 +251,9 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
 		err = -EREMOTE;
 		goto out_err;
 	}
-	if (ovl_is_whiteout(this)) {
+	path.dentry = this;
+	path.mnt = d->mnt;
+	if (ovl_path_is_whiteout(OVL_FS(d->sb), &path)) {
 		d->stop = d->opaque = true;
 		goto put_and_out;
 	}
@@ -264,8 +266,6 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
 		goto put_and_out;
 	}
 
-	path.dentry = this;
-	path.mnt = d->mnt;
 	if (!d_can_lookup(this)) {
 		if (d->is_dir || !last_element) {
 			d->stop = true;
@@ -438,7 +438,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
 	else if (IS_ERR(origin))
 		return PTR_ERR(origin);
 
-	if (upperdentry && !ovl_is_whiteout(upperdentry) &&
+	if (upperdentry && !ovl_upper_is_whiteout(ofs, upperdentry) &&
 	    inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
 		goto invalid;
 
@@ -1383,7 +1383,11 @@ bool ovl_lower_positive(struct dentry *dentry)
 				break;
 			}
 		} else {
-			positive = !ovl_is_whiteout(this);
+			struct path path = {
+				.dentry = this,
+				.mnt = parentpath->layer->mnt,
+			};
+			positive = !ovl_path_is_whiteout(OVL_FS(dentry->d_sb), &path);
 			done = true;
 			dput(this);
 		}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 311e1f37ce84..33d4c0011bb9 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -49,6 +49,7 @@ enum ovl_xattr {
 	OVL_XATTR_UUID,
 	OVL_XATTR_METACOPY,
 	OVL_XATTR_PROTATTR,
+	OVL_XATTR_NOWHITEOUT,
 };
 
 enum ovl_inode_flag {
@@ -469,16 +470,28 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
 void ovl_dir_modified(struct dentry *dentry, bool impurity);
 u64 ovl_inode_version_get(struct inode *inode);
 bool ovl_is_whiteout(struct dentry *dentry);
+bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path);
 struct file *ovl_path_open(const struct path *path, int flags);
 int ovl_copy_up_start(struct dentry *dentry, int flags);
 void ovl_copy_up_end(struct dentry *dentry);
 bool ovl_already_copied_up(struct dentry *dentry, int flags);
 bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
 			      enum ovl_xattr ox);
+bool ovl_path_check_nowhiteout_xattr(struct ovl_fs *ofs, const struct path *path);
 bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path);
 bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs,
 			 const struct path *upperpath);
 
+static inline bool ovl_upper_is_whiteout(struct ovl_fs *ofs,
+					 struct dentry *upperdentry)
+{
+	struct path upperpath = {
+		.dentry = upperdentry,
+		.mnt = ovl_upper_mnt(ofs),
+	};
+	return ovl_path_is_whiteout(ofs, &upperpath);
+}
+
 static inline bool ovl_check_origin_xattr(struct ovl_fs *ofs,
 					  struct dentry *upperdentry)
 {
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index de39e067ae65..9cf8e7e2961c 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -280,7 +280,12 @@ static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data
 			rdd->first_maybe_whiteout = p->next_maybe_whiteout;
 			dentry = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len);
 			if (!IS_ERR(dentry)) {
-				p->is_whiteout = ovl_is_whiteout(dentry);
+				struct path childpath = {
+					.dentry = dentry,
+					.mnt = path->mnt,
+				};
+				p->is_whiteout = ovl_path_is_whiteout(OVL_FS(rdd->dentry->d_sb),
+								      &childpath);
 				dput(dentry);
 			}
 		}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index a3be13306c73..995c21349bb9 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -579,7 +579,7 @@ static int ovl_check_rename_whiteout(struct ovl_fs *ofs)
 	if (IS_ERR(whiteout))
 		goto cleanup_temp;
 
-	err = ovl_is_whiteout(whiteout);
+	err = ovl_upper_is_whiteout(ofs, whiteout);
 
 	/* Best effort cleanup of whiteout and temp file */
 	if (err)
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 0f387092450e..da6d2abf64dd 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -575,6 +575,16 @@ bool ovl_is_whiteout(struct dentry *dentry)
 	return inode && IS_WHITEOUT(inode);
 }
 
+/*
+ * Use this over ovl_is_whiteout for upper and lower files, as it also
+ * handles escaped whiteout files.
+ */
+bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path)
+{
+	return ovl_is_whiteout(path->dentry) &&
+		!ovl_path_check_nowhiteout_xattr(ofs, path);
+}
+
 struct file *ovl_path_open(const struct path *path, int flags)
 {
 	struct inode *inode = d_inode(path->dentry);
@@ -663,6 +673,14 @@ void ovl_copy_up_end(struct dentry *dentry)
 	ovl_inode_unlock(d_inode(dentry));
 }
 
+bool ovl_path_check_nowhiteout_xattr(struct ovl_fs *ofs, const struct path *path)
+{
+	int res;
+
+	res = ovl_path_getxattr(ofs, path, OVL_XATTR_NOWHITEOUT, NULL, 0);
+	return res >= 0;
+}
+
 bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path)
 {
 	int res;
@@ -760,6 +778,7 @@ bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
 #define OVL_XATTR_UUID_POSTFIX		"uuid"
 #define OVL_XATTR_METACOPY_POSTFIX	"metacopy"
 #define OVL_XATTR_PROTATTR_POSTFIX	"protattr"
+#define OVL_XATTR_NOWHITEOUT_POSTFIX	"nowhiteout"
 
 #define OVL_XATTR_TAB_ENTRY(x) \
 	[x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
@@ -775,6 +794,7 @@ const char *const ovl_xattr_table[][2] = {
 	OVL_XATTR_TAB_ENTRY(OVL_XATTR_UUID),
 	OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY),
 	OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR),
+	OVL_XATTR_TAB_ENTRY(OVL_XATTR_NOWHITEOUT),
 };
 
 int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH v2 5/5] ovl: Add documentation on nesting of overlayfs mounts
  2023-08-17 11:05 [PATCH v2 0/5] Support nested overlayfs mounts Alexander Larsson
                   ` (3 preceding siblings ...)
  2023-08-17 11:05 ` [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs Alexander Larsson
@ 2023-08-17 11:05 ` Alexander Larsson
  2023-08-17 13:31   ` Amir Goldstein
  2023-08-17 14:45 ` [PATCH v2 0/5] Support nested " Amir Goldstein
  5 siblings, 1 reply; 47+ messages in thread
From: Alexander Larsson @ 2023-08-17 11:05 UTC (permalink / raw)
  To: miklos; +Cc: linux-unionfs, amir73il, Alexander Larsson

Signed-off-by: Alexander Larsson <alexl@redhat.com>
---
 Documentation/filesystems/overlayfs.rst | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
index 35853906accb..e38b2f5fadaf 100644
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -492,6 +492,28 @@ directory tree on the same or different underlying filesystem, and even
 to a different machine.  With the "inodes index" feature, trying to mount
 the copied layers will fail the verification of the lower root file handle.
 
+Nesting overlayfs mounts
+------------------------
+
+It is possible to use a lower directory that is stored on an overlayfs
+mount. For regular files this does not need any special care. However, files
+that have overlayfs attributes, such as whiteouts or `overlay.*` xattrs will
+be interpreted by the underlying overlayfs mount and stripped out. In order to
+allow the second overlayfs mount to see the attributes they must be escaped.
+
+Overlayfs specific xattrs are escaped by using a special prefix of
+`overlay.overlay.`. So, a file with a `trusted.overlay.overlay.metacopy` xattr
+in the lower dir will be exposed as a regular file with a
+`trusted.overlay.metacopy` xattr in the overlayfs mount. This can be nested
+by repeating the prefix multiple time, as each instance only removes one
+prefix.
+
+Whiteouts files marked with a `overlay.nowhiteout` xattr will cause overlayfs
+not to treat them as a whiteout. Since this xattr is then stripped out, the
+next layer will instead apply the whiteout.
+
+Files created via overlayfs will automatically be created with the right
+escapes in the upper directory.
 
 Non-standard behavior
 ---------------------
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 1/5] ovl: Move xattr support to new xattrs.c file
  2023-08-17 11:05 ` [PATCH v2 1/5] ovl: Move xattr support to new xattrs.c file Alexander Larsson
@ 2023-08-17 12:48   ` Amir Goldstein
  0 siblings, 0 replies; 47+ messages in thread
From: Amir Goldstein @ 2023-08-17 12:48 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: miklos, linux-unionfs

On Thu, Aug 17, 2023 at 2:05 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> This moves the code from from super.c and inode.c, and makes
> ovl_xattr_get/set() static.
>
> This is in preparation for doing more work on xattrs support.
>
> Signed-off-by: Alexander Larsson <alexl@redhat.com>

Thanks for this cleanup!

Reviewed-by: Amir Goldstein <amir73il@gmail.com>

> ---
>  fs/overlayfs/Makefile    |   2 +-
>  fs/overlayfs/inode.c     | 124 ------------------------
>  fs/overlayfs/overlayfs.h |  18 ++--
>  fs/overlayfs/super.c     |  65 +------------
>  fs/overlayfs/xattrs.c    | 198 +++++++++++++++++++++++++++++++++++++++
>  5 files changed, 209 insertions(+), 198 deletions(-)
>  create mode 100644 fs/overlayfs/xattrs.c
>
> diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
> index 4e173d56b11f..5648954f8588 100644
> --- a/fs/overlayfs/Makefile
> +++ b/fs/overlayfs/Makefile
> @@ -6,4 +6,4 @@
>  obj-$(CONFIG_OVERLAY_FS) += overlay.o
>
>  overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \
> -               copy_up.o export.o params.o
> +               copy_up.o export.o params.o xattrs.o
> diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
> index b395cd84bfce..375edf832641 100644
> --- a/fs/overlayfs/inode.c
> +++ b/fs/overlayfs/inode.c
> @@ -339,130 +339,6 @@ static const char *ovl_get_link(struct dentry *dentry,
>         return p;
>  }
>
> -bool ovl_is_private_xattr(struct super_block *sb, const char *name)
> -{
> -       struct ovl_fs *ofs = OVL_FS(sb);
> -
> -       if (ofs->config.userxattr)
> -               return strncmp(name, OVL_XATTR_USER_PREFIX,
> -                              sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0;
> -       else
> -               return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
> -                              sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0;
> -}
> -
> -int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
> -                 const void *value, size_t size, int flags)
> -{
> -       int err;
> -       struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
> -       struct dentry *upperdentry = ovl_i_dentry_upper(inode);
> -       struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
> -       struct path realpath;
> -       const struct cred *old_cred;
> -
> -       err = ovl_want_write(dentry);
> -       if (err)
> -               goto out;
> -
> -       if (!value && !upperdentry) {
> -               ovl_path_lower(dentry, &realpath);
> -               old_cred = ovl_override_creds(dentry->d_sb);
> -               err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0);
> -               revert_creds(old_cred);
> -               if (err < 0)
> -                       goto out_drop_write;
> -       }
> -
> -       if (!upperdentry) {
> -               err = ovl_copy_up(dentry);
> -               if (err)
> -                       goto out_drop_write;
> -
> -               realdentry = ovl_dentry_upper(dentry);
> -       }
> -
> -       old_cred = ovl_override_creds(dentry->d_sb);
> -       if (value) {
> -               err = ovl_do_setxattr(ofs, realdentry, name, value, size,
> -                                     flags);
> -       } else {
> -               WARN_ON(flags != XATTR_REPLACE);
> -               err = ovl_do_removexattr(ofs, realdentry, name);
> -       }
> -       revert_creds(old_cred);
> -
> -       /* copy c/mtime */
> -       ovl_copyattr(inode);
> -
> -out_drop_write:
> -       ovl_drop_write(dentry);
> -out:
> -       return err;
> -}
> -
> -int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
> -                 void *value, size_t size)
> -{
> -       ssize_t res;
> -       const struct cred *old_cred;
> -       struct path realpath;
> -
> -       ovl_i_path_real(inode, &realpath);
> -       old_cred = ovl_override_creds(dentry->d_sb);
> -       res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size);
> -       revert_creds(old_cred);
> -       return res;
> -}
> -
> -static bool ovl_can_list(struct super_block *sb, const char *s)
> -{
> -       /* Never list private (.overlay) */
> -       if (ovl_is_private_xattr(sb, s))
> -               return false;
> -
> -       /* List all non-trusted xattrs */
> -       if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
> -               return true;
> -
> -       /* list other trusted for superuser only */
> -       return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
> -}
> -
> -ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
> -{
> -       struct dentry *realdentry = ovl_dentry_real(dentry);
> -       ssize_t res;
> -       size_t len;
> -       char *s;
> -       const struct cred *old_cred;
> -
> -       old_cred = ovl_override_creds(dentry->d_sb);
> -       res = vfs_listxattr(realdentry, list, size);
> -       revert_creds(old_cred);
> -       if (res <= 0 || size == 0)
> -               return res;
> -
> -       /* filter out private xattrs */
> -       for (s = list, len = res; len;) {
> -               size_t slen = strnlen(s, len) + 1;
> -
> -               /* underlying fs providing us with an broken xattr list? */
> -               if (WARN_ON(slen > len))
> -                       return -EIO;
> -
> -               len -= slen;
> -               if (!ovl_can_list(dentry->d_sb, s)) {
> -                       res -= slen;
> -                       memmove(s, s + slen, len);
> -               } else {
> -                       s += slen;
> -               }
> -       }
> -
> -       return res;
> -}
> -
>  #ifdef CONFIG_FS_POSIX_ACL
>  /*
>   * Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index 72f57d919aa9..1283b7126b94 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -684,17 +684,8 @@ int ovl_set_nlink_lower(struct dentry *dentry);
>  unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry,
>                            struct dentry *upperdentry,
>                            unsigned int fallback);
> -int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
> -               struct iattr *attr);
> -int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
> -               struct kstat *stat, u32 request_mask, unsigned int flags);
>  int ovl_permission(struct mnt_idmap *idmap, struct inode *inode,
>                    int mask);
> -int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
> -                 const void *value, size_t size, int flags);
> -int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
> -                 void *value, size_t size);
> -ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
>
>  #ifdef CONFIG_FS_POSIX_ACL
>  struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap,
> @@ -830,3 +821,12 @@ static inline bool ovl_force_readonly(struct ovl_fs *ofs)
>  {
>         return (!ovl_upper_mnt(ofs) || !ofs->workdir);
>  }
> +
> +/* xattr.c */
> +
> +const struct xattr_handler **ovl_xattr_handlers(struct ovl_fs *ofs);
> +int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
> +               struct iattr *attr);
> +int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
> +               struct kstat *stat, u32 request_mask, unsigned int flags);
> +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
> index def266b5e2a3..a3be13306c73 100644
> --- a/fs/overlayfs/super.c
> +++ b/fs/overlayfs/super.c
> @@ -434,68 +434,6 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
>         return ok;
>  }
>
> -static int ovl_own_xattr_get(const struct xattr_handler *handler,
> -                            struct dentry *dentry, struct inode *inode,
> -                            const char *name, void *buffer, size_t size)
> -{
> -       return -EOPNOTSUPP;
> -}
> -
> -static int ovl_own_xattr_set(const struct xattr_handler *handler,
> -                            struct mnt_idmap *idmap,
> -                            struct dentry *dentry, struct inode *inode,
> -                            const char *name, const void *value,
> -                            size_t size, int flags)
> -{
> -       return -EOPNOTSUPP;
> -}
> -
> -static int ovl_other_xattr_get(const struct xattr_handler *handler,
> -                              struct dentry *dentry, struct inode *inode,
> -                              const char *name, void *buffer, size_t size)
> -{
> -       return ovl_xattr_get(dentry, inode, name, buffer, size);
> -}
> -
> -static int ovl_other_xattr_set(const struct xattr_handler *handler,
> -                              struct mnt_idmap *idmap,
> -                              struct dentry *dentry, struct inode *inode,
> -                              const char *name, const void *value,
> -                              size_t size, int flags)
> -{
> -       return ovl_xattr_set(dentry, inode, name, value, size, flags);
> -}
> -
> -static const struct xattr_handler ovl_own_trusted_xattr_handler = {
> -       .prefix = OVL_XATTR_TRUSTED_PREFIX,
> -       .get = ovl_own_xattr_get,
> -       .set = ovl_own_xattr_set,
> -};
> -
> -static const struct xattr_handler ovl_own_user_xattr_handler = {
> -       .prefix = OVL_XATTR_USER_PREFIX,
> -       .get = ovl_own_xattr_get,
> -       .set = ovl_own_xattr_set,
> -};
> -
> -static const struct xattr_handler ovl_other_xattr_handler = {
> -       .prefix = "", /* catch all */
> -       .get = ovl_other_xattr_get,
> -       .set = ovl_other_xattr_set,
> -};
> -
> -static const struct xattr_handler *ovl_trusted_xattr_handlers[] = {
> -       &ovl_own_trusted_xattr_handler,
> -       &ovl_other_xattr_handler,
> -       NULL
> -};
> -
> -static const struct xattr_handler *ovl_user_xattr_handlers[] = {
> -       &ovl_own_user_xattr_handler,
> -       &ovl_other_xattr_handler,
> -       NULL
> -};
> -
>  static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
>                           struct inode **ptrap, const char *name)
>  {
> @@ -1478,8 +1416,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
>         cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
>
>         sb->s_magic = OVERLAYFS_SUPER_MAGIC;
> -       sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers :
> -               ovl_trusted_xattr_handlers;
> +       sb->s_xattr = ovl_xattr_handlers(ofs);
>         sb->s_fs_info = ofs;
>         sb->s_flags |= SB_POSIXACL;
>         sb->s_iflags |= SB_I_SKIP_SYNC | SB_I_IMA_UNVERIFIABLE_SIGNATURE;
> diff --git a/fs/overlayfs/xattrs.c b/fs/overlayfs/xattrs.c
> new file mode 100644
> index 000000000000..edc7cc49a7c4
> --- /dev/null
> +++ b/fs/overlayfs/xattrs.c
> @@ -0,0 +1,198 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +#include <linux/fs.h>
> +#include <linux/xattr.h>
> +#include "overlayfs.h"
> +
> +bool ovl_is_private_xattr(struct super_block *sb, const char *name)
> +{
> +       struct ovl_fs *ofs = OVL_FS(sb);
> +
> +       if (ofs->config.userxattr)
> +               return strncmp(name, OVL_XATTR_USER_PREFIX,
> +                              sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0;
> +       else
> +               return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
> +                              sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0;
> +}
> +
> +static int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
> +                        const void *value, size_t size, int flags)
> +{
> +       int err;
> +       struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
> +       struct dentry *upperdentry = ovl_i_dentry_upper(inode);
> +       struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
> +       struct path realpath;
> +       const struct cred *old_cred;
> +
> +       err = ovl_want_write(dentry);
> +       if (err)
> +               goto out;
> +
> +       if (!value && !upperdentry) {
> +               ovl_path_lower(dentry, &realpath);
> +               old_cred = ovl_override_creds(dentry->d_sb);
> +               err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0);
> +               revert_creds(old_cred);
> +               if (err < 0)
> +                       goto out_drop_write;
> +       }
> +
> +       if (!upperdentry) {
> +               err = ovl_copy_up(dentry);
> +               if (err)
> +                       goto out_drop_write;
> +
> +               realdentry = ovl_dentry_upper(dentry);
> +       }
> +
> +       old_cred = ovl_override_creds(dentry->d_sb);
> +       if (value) {
> +               err = ovl_do_setxattr(ofs, realdentry, name, value, size,
> +                                     flags);
> +       } else {
> +               WARN_ON(flags != XATTR_REPLACE);
> +               err = ovl_do_removexattr(ofs, realdentry, name);
> +       }
> +       revert_creds(old_cred);
> +
> +       /* copy c/mtime */
> +       ovl_copyattr(inode);
> +
> +out_drop_write:
> +       ovl_drop_write(dentry);
> +out:
> +       return err;
> +}
> +
> +static int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
> +                        void *value, size_t size)
> +{
> +       ssize_t res;
> +       const struct cred *old_cred;
> +       struct path realpath;
> +
> +       ovl_i_path_real(inode, &realpath);
> +       old_cred = ovl_override_creds(dentry->d_sb);
> +       res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size);
> +       revert_creds(old_cred);
> +       return res;
> +}
> +
> +static bool ovl_can_list(struct super_block *sb, const char *s)
> +{
> +       /* Never list private (.overlay) */
> +       if (ovl_is_private_xattr(sb, s))
> +               return false;
> +
> +       /* List all non-trusted xattrs */
> +       if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
> +               return true;
> +
> +       /* list other trusted for superuser only */
> +       return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
> +}
> +
> +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
> +{
> +       struct dentry *realdentry = ovl_dentry_real(dentry);
> +       ssize_t res;
> +       size_t len;
> +       char *s;
> +       const struct cred *old_cred;
> +
> +       old_cred = ovl_override_creds(dentry->d_sb);
> +       res = vfs_listxattr(realdentry, list, size);
> +       revert_creds(old_cred);
> +       if (res <= 0 || size == 0)
> +               return res;
> +
> +       /* filter out private xattrs */
> +       for (s = list, len = res; len;) {
> +               size_t slen = strnlen(s, len) + 1;
> +
> +               /* underlying fs providing us with an broken xattr list? */
> +               if (WARN_ON(slen > len))
> +                       return -EIO;
> +
> +               len -= slen;
> +               if (!ovl_can_list(dentry->d_sb, s)) {
> +                       res -= slen;
> +                       memmove(s, s + slen, len);
> +               } else {
> +                       s += slen;
> +               }
> +       }
> +
> +       return res;
> +}
> +
> +static int ovl_own_xattr_get(const struct xattr_handler *handler,
> +                            struct dentry *dentry, struct inode *inode,
> +                            const char *name, void *buffer, size_t size)
> +{
> +       return -EOPNOTSUPP;
> +}
> +
> +static int ovl_own_xattr_set(const struct xattr_handler *handler,
> +                            struct mnt_idmap *idmap,
> +                            struct dentry *dentry, struct inode *inode,
> +                            const char *name, const void *value,
> +                            size_t size, int flags)
> +{
> +       return -EOPNOTSUPP;
> +}
> +
> +static int ovl_other_xattr_get(const struct xattr_handler *handler,
> +                              struct dentry *dentry, struct inode *inode,
> +                              const char *name, void *buffer, size_t size)
> +{
> +       return ovl_xattr_get(dentry, inode, name, buffer, size);
> +}
> +
> +static int ovl_other_xattr_set(const struct xattr_handler *handler,
> +                              struct mnt_idmap *idmap,
> +                              struct dentry *dentry, struct inode *inode,
> +                              const char *name, const void *value,
> +                              size_t size, int flags)
> +{
> +       return ovl_xattr_set(dentry, inode, name, value, size, flags);
> +}
> +
> +static const struct xattr_handler ovl_own_trusted_xattr_handler = {
> +       .prefix = OVL_XATTR_TRUSTED_PREFIX,
> +       .get = ovl_own_xattr_get,
> +       .set = ovl_own_xattr_set,
> +};
> +
> +static const struct xattr_handler ovl_own_user_xattr_handler = {
> +       .prefix = OVL_XATTR_USER_PREFIX,
> +       .get = ovl_own_xattr_get,
> +       .set = ovl_own_xattr_set,
> +};
> +
> +static const struct xattr_handler ovl_other_xattr_handler = {
> +       .prefix = "", /* catch all */
> +       .get = ovl_other_xattr_get,
> +       .set = ovl_other_xattr_set,
> +};
> +
> +static const struct xattr_handler *ovl_trusted_xattr_handlers[] = {
> +       &ovl_own_trusted_xattr_handler,
> +       &ovl_other_xattr_handler,
> +       NULL
> +};
> +
> +static const struct xattr_handler *ovl_user_xattr_handlers[] = {
> +       &ovl_own_user_xattr_handler,
> +       &ovl_other_xattr_handler,
> +       NULL
> +};
> +
> +const struct xattr_handler **ovl_xattr_handlers(struct ovl_fs *ofs)
> +{
> +       return ofs->config.userxattr ? ovl_user_xattr_handlers :
> +               ovl_trusted_xattr_handlers;
> +}
> +
> --
> 2.41.0
>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 2/5] ovl: Add OVL_XATTR_TRUSTED/USER_PREFIX_LEN macros
  2023-08-17 11:05 ` [PATCH v2 2/5] ovl: Add OVL_XATTR_TRUSTED/USER_PREFIX_LEN macros Alexander Larsson
@ 2023-08-17 12:48   ` Amir Goldstein
  0 siblings, 0 replies; 47+ messages in thread
From: Amir Goldstein @ 2023-08-17 12:48 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: miklos, linux-unionfs

On Thu, Aug 17, 2023 at 2:05 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> These match the ones for e.g. XATTR_TRUSTED_PREFIX_LEN.
>
> Signed-off-by: Alexander Larsson <alexl@redhat.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>

> ---
>  fs/overlayfs/overlayfs.h | 2 ++
>  fs/overlayfs/xattrs.c    | 4 ++--
>  2 files changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index 1283b7126b94..ef993a543b2a 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -28,7 +28,9 @@ enum ovl_path_type {
>
>  #define OVL_XATTR_NAMESPACE "overlay."
>  #define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE
> +#define OVL_XATTR_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1)
>  #define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE
> +#define OVL_XATTR_USER_PREFIX_LEN (sizeof(OVL_XATTR_USER_PREFIX) - 1)
>
>  enum ovl_xattr {
>         OVL_XATTR_OPAQUE,
> diff --git a/fs/overlayfs/xattrs.c b/fs/overlayfs/xattrs.c
> index edc7cc49a7c4..b8ea96606ea8 100644
> --- a/fs/overlayfs/xattrs.c
> +++ b/fs/overlayfs/xattrs.c
> @@ -10,10 +10,10 @@ bool ovl_is_private_xattr(struct super_block *sb, const char *name)
>
>         if (ofs->config.userxattr)
>                 return strncmp(name, OVL_XATTR_USER_PREFIX,
> -                              sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0;
> +                              OVL_XATTR_USER_PREFIX_LEN) == 0;
>         else
>                 return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
> -                              sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0;
> +                              OVL_XATTR_TRUSTED_PREFIX_LEN) == 0;
>  }
>
>  static int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
> --
> 2.41.0
>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 5/5] ovl: Add documentation on nesting of overlayfs mounts
  2023-08-17 11:05 ` [PATCH v2 5/5] ovl: Add documentation on nesting of overlayfs mounts Alexander Larsson
@ 2023-08-17 13:31   ` Amir Goldstein
  2023-08-17 13:59     ` Alexander Larsson
  0 siblings, 1 reply; 47+ messages in thread
From: Amir Goldstein @ 2023-08-17 13:31 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: miklos, linux-unionfs

On Thu, Aug 17, 2023 at 2:05 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> Signed-off-by: Alexander Larsson <alexl@redhat.com>
> ---
>  Documentation/filesystems/overlayfs.rst | 22 ++++++++++++++++++++++
>  1 file changed, 22 insertions(+)
>
> diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
> index 35853906accb..e38b2f5fadaf 100644
> --- a/Documentation/filesystems/overlayfs.rst
> +++ b/Documentation/filesystems/overlayfs.rst
> @@ -492,6 +492,28 @@ directory tree on the same or different underlying filesystem, and even
>  to a different machine.  With the "inodes index" feature, trying to mount
>  the copied layers will fail the verification of the lower root file handle.
>
> +Nesting overlayfs mounts
> +------------------------
> +
> +It is possible to use a lower directory that is stored on an overlayfs
> +mount. For regular files this does not need any special care. However, files
> +that have overlayfs attributes, such as whiteouts or `overlay.*` xattrs will
> +be interpreted by the underlying overlayfs mount and stripped out. In order to
> +allow the second overlayfs mount to see the attributes they must be escaped.
> +
> +Overlayfs specific xattrs are escaped by using a special prefix of
> +`overlay.overlay.`. So, a file with a `trusted.overlay.overlay.metacopy` xattr
> +in the lower dir will be exposed as a regular file with a
> +`trusted.overlay.metacopy` xattr in the overlayfs mount. This can be nested
> +by repeating the prefix multiple time, as each instance only removes one
> +prefix.
> +
> +Whiteouts files marked with a `overlay.nowhiteout` xattr will cause overlayfs
> +not to treat them as a whiteout. Since this xattr is then stripped out, the
> +next layer will instead apply the whiteout.
> +
> +Files created via overlayfs will automatically be created with the right
> +escapes in the upper directory.
>

I was wondering why you used `` around xattr names.
Is that intentional? I am not an expert in rst format, but
other places in this doc use "" around xattr names and it
does not look like the formatting of `` is handled well by github at least:
https://github.com/alexlarsson/linux/blob/ovl-nesting/Documentation/filesystems/overlayfs.rst

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-17 11:05 ` [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs Alexander Larsson
@ 2023-08-17 13:40   ` Amir Goldstein
  2023-08-21 10:59   ` Miklos Szeredi
  1 sibling, 0 replies; 47+ messages in thread
From: Amir Goldstein @ 2023-08-17 13:40 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: miklos, linux-unionfs

On Thu, Aug 17, 2023 at 2:05 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> This is needed to properly stack overlay filesystems, I.E, being able
> to create a whiteout file on an overlay mount and then use that as
> part of the lowerdir in another overlay mount.
>
> The way this works is that we create a regular whiteout, but set the
> `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> whiteout we check this xattr and don't treat it as a whiteout if it is
> set. The xattr itself is then stripped and when viewed as part of the
> overlayfs mount it looks like a regular whiteout.
>
> In order to make the creation of the whiteout file with xattr atomic
> we always take ovl_create_over_whiteout() codepath when creating
> whiteouts.
>
> Signed-off-by: Alexander Larsson <alexl@redhat.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>

> ---
>  fs/overlayfs/dir.c       | 24 +++++++++++++++++-------
>  fs/overlayfs/namei.c     | 14 +++++++++-----
>  fs/overlayfs/overlayfs.h | 13 +++++++++++++
>  fs/overlayfs/readdir.c   |  7 ++++++-
>  fs/overlayfs/super.c     |  2 +-
>  fs/overlayfs/util.c      | 20 ++++++++++++++++++++
>  6 files changed, 66 insertions(+), 14 deletions(-)
>
> diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> index 033fc0458a3d..2f3442f5430d 100644
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -442,6 +442,11 @@ static int ovl_set_upper_acl(struct ovl_fs *ofs, struct dentry *upperdentry,
>         return ovl_do_set_acl(ofs, upperdentry, acl_name, acl);
>  }
>
> +static bool ovl_cattr_is_whiteout(struct ovl_cattr *attr)
> +{
> +       return S_ISCHR(attr->mode) && attr->rdev == WHITEOUT_DEV;
> +}
> +
>  static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
>                                     struct ovl_cattr *cattr)
>  {
> @@ -477,7 +482,8 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
>                 goto out_unlock;
>
>         err = -ESTALE;
> -       if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper)))
> +       if (!ovl_cattr_is_whiteout(cattr) &&
> +           (d_is_negative(upper) || !ovl_upper_is_whiteout(ofs, upper)))
>                 goto out_dput;
>
>         newdentry = ovl_create_temp(ofs, workdir, cattr);
> @@ -485,6 +491,13 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
>         if (IS_ERR(newdentry))
>                 goto out_dput;
>
> +       if (ovl_cattr_is_whiteout(cattr)) {
> +               err = ovl_setxattr(ofs, newdentry, OVL_XATTR_NOWHITEOUT,
> +                                  NULL, 0);
> +               if (err < 0)
> +                       goto out_cleanup;
> +       }
> +
>         /*
>          * mode could have been mutilated due to umask (e.g. sgid directory)
>          */
> @@ -606,7 +619,8 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
>                 put_cred(override_cred);
>         }
>
> -       if (!ovl_dentry_is_whiteout(dentry))
> +       /* Create whiteouts in workdir so we can atomically set nowiteout xattr */
> +       if (!ovl_dentry_is_whiteout(dentry) && !ovl_cattr_is_whiteout(attr))
>                 err = ovl_create_upper(dentry, inode, attr);
>         else
>                 err = ovl_create_over_whiteout(dentry, inode, attr);
> @@ -669,10 +683,6 @@ static int ovl_mkdir(struct mnt_idmap *idmap, struct inode *dir,
>  static int ovl_mknod(struct mnt_idmap *idmap, struct inode *dir,
>                      struct dentry *dentry, umode_t mode, dev_t rdev)
>  {
> -       /* Don't allow creation of "whiteout" on overlay */
> -       if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
> -               return -EPERM;
> -
>         return ovl_create_object(dentry, mode, rdev, NULL);
>  }
>
> @@ -1219,7 +1229,7 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
>                 }
>         } else {
>                 if (!d_is_negative(newdentry)) {
> -                       if (!new_opaque || !ovl_is_whiteout(newdentry))
> +                       if (!new_opaque || !ovl_upper_is_whiteout(ofs, newdentry))
>                                 goto out_dput;
>                 } else {
>                         if (flags & RENAME_EXCHANGE)
> diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
> index 80391c687c2a..e90167789a13 100644
> --- a/fs/overlayfs/namei.c
> +++ b/fs/overlayfs/namei.c
> @@ -251,7 +251,9 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
>                 err = -EREMOTE;
>                 goto out_err;
>         }
> -       if (ovl_is_whiteout(this)) {
> +       path.dentry = this;
> +       path.mnt = d->mnt;
> +       if (ovl_path_is_whiteout(OVL_FS(d->sb), &path)) {
>                 d->stop = d->opaque = true;
>                 goto put_and_out;
>         }
> @@ -264,8 +266,6 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
>                 goto put_and_out;
>         }
>
> -       path.dentry = this;
> -       path.mnt = d->mnt;
>         if (!d_can_lookup(this)) {
>                 if (d->is_dir || !last_element) {
>                         d->stop = true;
> @@ -438,7 +438,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
>         else if (IS_ERR(origin))
>                 return PTR_ERR(origin);
>
> -       if (upperdentry && !ovl_is_whiteout(upperdentry) &&
> +       if (upperdentry && !ovl_upper_is_whiteout(ofs, upperdentry) &&
>             inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
>                 goto invalid;
>
> @@ -1383,7 +1383,11 @@ bool ovl_lower_positive(struct dentry *dentry)
>                                 break;
>                         }
>                 } else {
> -                       positive = !ovl_is_whiteout(this);
> +                       struct path path = {
> +                               .dentry = this,
> +                               .mnt = parentpath->layer->mnt,
> +                       };
> +                       positive = !ovl_path_is_whiteout(OVL_FS(dentry->d_sb), &path);
>                         done = true;
>                         dput(this);
>                 }
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index 311e1f37ce84..33d4c0011bb9 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -49,6 +49,7 @@ enum ovl_xattr {
>         OVL_XATTR_UUID,
>         OVL_XATTR_METACOPY,
>         OVL_XATTR_PROTATTR,
> +       OVL_XATTR_NOWHITEOUT,
>  };
>
>  enum ovl_inode_flag {
> @@ -469,16 +470,28 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
>  void ovl_dir_modified(struct dentry *dentry, bool impurity);
>  u64 ovl_inode_version_get(struct inode *inode);
>  bool ovl_is_whiteout(struct dentry *dentry);
> +bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path);
>  struct file *ovl_path_open(const struct path *path, int flags);
>  int ovl_copy_up_start(struct dentry *dentry, int flags);
>  void ovl_copy_up_end(struct dentry *dentry);
>  bool ovl_already_copied_up(struct dentry *dentry, int flags);
>  bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
>                               enum ovl_xattr ox);
> +bool ovl_path_check_nowhiteout_xattr(struct ovl_fs *ofs, const struct path *path);
>  bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path);
>  bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs,
>                          const struct path *upperpath);
>
> +static inline bool ovl_upper_is_whiteout(struct ovl_fs *ofs,
> +                                        struct dentry *upperdentry)
> +{
> +       struct path upperpath = {
> +               .dentry = upperdentry,
> +               .mnt = ovl_upper_mnt(ofs),
> +       };
> +       return ovl_path_is_whiteout(ofs, &upperpath);
> +}
> +
>  static inline bool ovl_check_origin_xattr(struct ovl_fs *ofs,
>                                           struct dentry *upperdentry)
>  {
> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> index de39e067ae65..9cf8e7e2961c 100644
> --- a/fs/overlayfs/readdir.c
> +++ b/fs/overlayfs/readdir.c
> @@ -280,7 +280,12 @@ static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data
>                         rdd->first_maybe_whiteout = p->next_maybe_whiteout;
>                         dentry = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len);
>                         if (!IS_ERR(dentry)) {
> -                               p->is_whiteout = ovl_is_whiteout(dentry);
> +                               struct path childpath = {
> +                                       .dentry = dentry,
> +                                       .mnt = path->mnt,
> +                               };
> +                               p->is_whiteout = ovl_path_is_whiteout(OVL_FS(rdd->dentry->d_sb),
> +                                                                     &childpath);
>                                 dput(dentry);
>                         }
>                 }
> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
> index a3be13306c73..995c21349bb9 100644
> --- a/fs/overlayfs/super.c
> +++ b/fs/overlayfs/super.c
> @@ -579,7 +579,7 @@ static int ovl_check_rename_whiteout(struct ovl_fs *ofs)
>         if (IS_ERR(whiteout))
>                 goto cleanup_temp;
>
> -       err = ovl_is_whiteout(whiteout);
> +       err = ovl_upper_is_whiteout(ofs, whiteout);
>
>         /* Best effort cleanup of whiteout and temp file */
>         if (err)
> diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
> index 0f387092450e..da6d2abf64dd 100644
> --- a/fs/overlayfs/util.c
> +++ b/fs/overlayfs/util.c
> @@ -575,6 +575,16 @@ bool ovl_is_whiteout(struct dentry *dentry)
>         return inode && IS_WHITEOUT(inode);
>  }
>
> +/*
> + * Use this over ovl_is_whiteout for upper and lower files, as it also
> + * handles escaped whiteout files.
> + */
> +bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path)
> +{
> +       return ovl_is_whiteout(path->dentry) &&
> +               !ovl_path_check_nowhiteout_xattr(ofs, path);
> +}
> +
>  struct file *ovl_path_open(const struct path *path, int flags)
>  {
>         struct inode *inode = d_inode(path->dentry);
> @@ -663,6 +673,14 @@ void ovl_copy_up_end(struct dentry *dentry)
>         ovl_inode_unlock(d_inode(dentry));
>  }
>
> +bool ovl_path_check_nowhiteout_xattr(struct ovl_fs *ofs, const struct path *path)
> +{
> +       int res;
> +
> +       res = ovl_path_getxattr(ofs, path, OVL_XATTR_NOWHITEOUT, NULL, 0);
> +       return res >= 0;
> +}
> +
>  bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path)
>  {
>         int res;
> @@ -760,6 +778,7 @@ bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
>  #define OVL_XATTR_UUID_POSTFIX         "uuid"
>  #define OVL_XATTR_METACOPY_POSTFIX     "metacopy"
>  #define OVL_XATTR_PROTATTR_POSTFIX     "protattr"
> +#define OVL_XATTR_NOWHITEOUT_POSTFIX   "nowhiteout"
>
>  #define OVL_XATTR_TAB_ENTRY(x) \
>         [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
> @@ -775,6 +794,7 @@ const char *const ovl_xattr_table[][2] = {
>         OVL_XATTR_TAB_ENTRY(OVL_XATTR_UUID),
>         OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY),
>         OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR),
> +       OVL_XATTR_TAB_ENTRY(OVL_XATTR_NOWHITEOUT),
>  };
>
>  int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
> --
> 2.41.0
>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 5/5] ovl: Add documentation on nesting of overlayfs mounts
  2023-08-17 13:31   ` Amir Goldstein
@ 2023-08-17 13:59     ` Alexander Larsson
  0 siblings, 0 replies; 47+ messages in thread
From: Alexander Larsson @ 2023-08-17 13:59 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: miklos, linux-unionfs

On Thu, Aug 17, 2023 at 3:31 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Thu, Aug 17, 2023 at 2:05 PM Alexander Larsson <alexl@redhat.com> wrote:
> >
> > Signed-off-by: Alexander Larsson <alexl@redhat.com>
> > ---
> >  Documentation/filesystems/overlayfs.rst | 22 ++++++++++++++++++++++
> >  1 file changed, 22 insertions(+)
> >
> > diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
> > index 35853906accb..e38b2f5fadaf 100644
> > --- a/Documentation/filesystems/overlayfs.rst
> > +++ b/Documentation/filesystems/overlayfs.rst
> > @@ -492,6 +492,28 @@ directory tree on the same or different underlying filesystem, and even
> >  to a different machine.  With the "inodes index" feature, trying to mount
> >  the copied layers will fail the verification of the lower root file handle.
> >
> > +Nesting overlayfs mounts
> > +------------------------
> > +
> > +It is possible to use a lower directory that is stored on an overlayfs
> > +mount. For regular files this does not need any special care. However, files
> > +that have overlayfs attributes, such as whiteouts or `overlay.*` xattrs will
> > +be interpreted by the underlying overlayfs mount and stripped out. In order to
> > +allow the second overlayfs mount to see the attributes they must be escaped.
> > +
> > +Overlayfs specific xattrs are escaped by using a special prefix of
> > +`overlay.overlay.`. So, a file with a `trusted.overlay.overlay.metacopy` xattr
> > +in the lower dir will be exposed as a regular file with a
> > +`trusted.overlay.metacopy` xattr in the overlayfs mount. This can be nested
> > +by repeating the prefix multiple time, as each instance only removes one
> > +prefix.
> > +
> > +Whiteouts files marked with a `overlay.nowhiteout` xattr will cause overlayfs
> > +not to treat them as a whiteout. Since this xattr is then stripped out, the
> > +next layer will instead apply the whiteout.
> > +
> > +Files created via overlayfs will automatically be created with the right
> > +escapes in the upper directory.
> >
>
> I was wondering why you used `` around xattr names.
> Is that intentional? I am not an expert in rst format, but
> other places in this doc use "" around xattr names and it
> does not look like the formatting of `` is handled well by github at least:
> https://github.com/alexlarsson/linux/blob/ovl-nesting/Documentation/filesystems/overlayfs.rst

I guess I'm just too used to markdown. Will change to ".

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 3/5] ovl: Support escaped overlay.* xattrs
  2023-08-17 11:05 ` [PATCH v2 3/5] ovl: Support escaped overlay.* xattrs Alexander Larsson
@ 2023-08-17 14:31   ` Amir Goldstein
  2023-08-17 15:00     ` Alexander Larsson
  0 siblings, 1 reply; 47+ messages in thread
From: Amir Goldstein @ 2023-08-17 14:31 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: miklos, linux-unionfs

On Thu, Aug 17, 2023 at 2:05 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> There are cases where you want to use an overlayfs mount as a lowerdir
> for another overlayfs mount. For example, if the system rootfs is on
> overlayfs due to composefs, or to make it volatile (via tmps), then
> you cannot currently store a lowerdir on the rootfs. This means you
> can't e.g. store on the rootfs a prepared container image for use
> using overlayfs.
>
> To work around this, we introduce an escapment mechanism for overlayfs
> xattrs. Whenever the lower/upper dir has a xattr named
> `overlay.overlay.XYZ`, we list it as overlay.XYZ in listxattrs, and
> when the user calls getxattr or setxattr on `overlay.XYZ`, we apply to
> `overlay.overlay.XYZ` in the backing directories.
>
> This allows storing any kind of overlay xattrs in a overlayfs mount
> that can be used as a lowerdir in another mount. It is possible to
> stack this mechanism multiple times, such that
> overlay.overlay.overlay.XYZ will survive two levels of overlay mounts,
> however this is not all that useful in practice because of stack depth
> limitations of overlayfs mounts.
>

Please elaborate what happens when lower overlayfs is trusted.overlay
and nested overlayfs is user.overlay or the other way around.
Is it true that this patch would not be needed at all if this was the case?

In any case, I think that this case would not be uncommon, so it is worth
adding it in the tests.

Thanks,
Amir.

> Signed-off-by: Alexander Larsson <alexl@redhat.com>
> ---
>  fs/overlayfs/overlayfs.h |  7 ++++
>  fs/overlayfs/xattrs.c    | 78 +++++++++++++++++++++++++++++++++++++---
>  2 files changed, 81 insertions(+), 4 deletions(-)
>
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index ef993a543b2a..311e1f37ce84 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -32,6 +32,13 @@ enum ovl_path_type {
>  #define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE
>  #define OVL_XATTR_USER_PREFIX_LEN (sizeof(OVL_XATTR_USER_PREFIX) - 1)
>
> +#define OVL_XATTR_ESCAPE_PREFIX OVL_XATTR_NAMESPACE
> +#define OVL_XATTR_ESCAPE_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_PREFIX) - 1)
> +#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_ESCAPE_PREFIX
> +#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_TRUSTED_PREFIX) - 1)
> +#define OVL_XATTR_ESCAPE_USER_PREFIX OVL_XATTR_USER_PREFIX OVL_XATTR_ESCAPE_PREFIX
> +#define OVL_XATTR_ESCAPE_USER_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_USER_PREFIX) - 1)
> +
>  enum ovl_xattr {
>         OVL_XATTR_OPAQUE,
>         OVL_XATTR_REDIRECT,
> diff --git a/fs/overlayfs/xattrs.c b/fs/overlayfs/xattrs.c
> index b8ea96606ea8..27b31f812eb1 100644
> --- a/fs/overlayfs/xattrs.c
> +++ b/fs/overlayfs/xattrs.c
> @@ -4,6 +4,18 @@
>  #include <linux/xattr.h>
>  #include "overlayfs.h"
>
> +bool ovl_is_escaped_xattr(struct super_block *sb, const char *name)
> +{
> +       struct ovl_fs *ofs = sb->s_fs_info;
> +
> +       if (ofs->config.userxattr)
> +               return strncmp(name, OVL_XATTR_ESCAPE_USER_PREFIX,
> +                              OVL_XATTR_ESCAPE_USER_PREFIX_LEN) == 0;
> +       else
> +               return strncmp(name, OVL_XATTR_ESCAPE_TRUSTED_PREFIX,
> +                              OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN - 1) == 0;
> +}
> +
>  bool ovl_is_private_xattr(struct super_block *sb, const char *name)
>  {
>         struct ovl_fs *ofs = OVL_FS(sb);
> @@ -82,8 +94,8 @@ static int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char
>
>  static bool ovl_can_list(struct super_block *sb, const char *s)
>  {
> -       /* Never list private (.overlay) */
> -       if (ovl_is_private_xattr(sb, s))
> +       /* Never list non-escaped private (.overlay) */
> +       if (ovl_is_private_xattr(sb, s) && !ovl_is_escaped_xattr(sb, s))
>                 return false;
>
>         /* List all non-trusted xattrs */
> @@ -97,10 +109,12 @@ static bool ovl_can_list(struct super_block *sb, const char *s)
>  ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
>  {
>         struct dentry *realdentry = ovl_dentry_real(dentry);
> +       struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
>         ssize_t res;
>         size_t len;
>         char *s;
>         const struct cred *old_cred;
> +       size_t prefix_len;
>
>         old_cred = ovl_override_creds(dentry->d_sb);
>         res = vfs_listxattr(realdentry, list, size);
> @@ -108,6 +122,9 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
>         if (res <= 0 || size == 0)
>                 return res;
>
> +       prefix_len = ofs->config.userxattr ?
> +               OVL_XATTR_USER_PREFIX_LEN : OVL_XATTR_TRUSTED_PREFIX_LEN;
> +
>         /* filter out private xattrs */
>         for (s = list, len = res; len;) {
>                 size_t slen = strnlen(s, len) + 1;
> @@ -120,6 +137,12 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
>                 if (!ovl_can_list(dentry->d_sb, s)) {
>                         res -= slen;
>                         memmove(s, s + slen, len);
> +               } else if (ovl_is_escaped_xattr(dentry->d_sb, s)) {
> +                       memmove(s + prefix_len,
> +                               s + prefix_len + OVL_XATTR_ESCAPE_PREFIX_LEN,
> +                               slen - (prefix_len + OVL_XATTR_ESCAPE_PREFIX_LEN) + len);
> +                       res -= OVL_XATTR_ESCAPE_PREFIX_LEN;
> +                       s += slen - OVL_XATTR_ESCAPE_PREFIX_LEN;
>                 } else {
>                         s += slen;
>                 }
> @@ -128,11 +151,47 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
>         return res;
>  }
>
> +static char *ovl_xattr_escape_name(const char *prefix, const char *name)
> +{
> +       size_t prefix_len = strlen(prefix);
> +       size_t name_len = strlen(name);
> +       size_t escaped_len;
> +       char *escaped, *s;
> +
> +       escaped_len = prefix_len + OVL_XATTR_ESCAPE_PREFIX_LEN + name_len;
> +       if (escaped_len > XATTR_NAME_MAX)
> +               return ERR_PTR(-EOPNOTSUPP);
> +
> +       escaped = kmalloc(escaped_len + 1, GFP_KERNEL);
> +       if (escaped == NULL)
> +               return ERR_PTR(-ENOMEM);
> +
> +       s = escaped;
> +       memcpy(s, prefix, prefix_len);
> +       s += prefix_len;
> +       memcpy(s, OVL_XATTR_ESCAPE_PREFIX, OVL_XATTR_ESCAPE_PREFIX_LEN);
> +       s += OVL_XATTR_ESCAPE_PREFIX_LEN;
> +       memcpy(s, name, name_len + 1);
> +
> +       return escaped;
> +}
> +
>  static int ovl_own_xattr_get(const struct xattr_handler *handler,
>                              struct dentry *dentry, struct inode *inode,
>                              const char *name, void *buffer, size_t size)
>  {
> -       return -EOPNOTSUPP;
> +       char *escaped;
> +       int r;
> +
> +       escaped = ovl_xattr_escape_name(handler->prefix, name);
> +       if (IS_ERR(escaped))
> +               return PTR_ERR(escaped);
> +
> +       r = ovl_xattr_get(dentry, inode, escaped, buffer, size);
> +
> +       kfree(escaped);
> +
> +       return r;
>  }
>
>  static int ovl_own_xattr_set(const struct xattr_handler *handler,
> @@ -141,7 +200,18 @@ static int ovl_own_xattr_set(const struct xattr_handler *handler,
>                              const char *name, const void *value,
>                              size_t size, int flags)
>  {
> -       return -EOPNOTSUPP;
> +       char *escaped;
> +       int r;
> +
> +       escaped = ovl_xattr_escape_name(handler->prefix, name);
> +       if (IS_ERR(escaped))
> +               return PTR_ERR(escaped);
> +
> +       r = ovl_xattr_set(dentry, inode, escaped, value, size, flags);
> +
> +       kfree(escaped);
> +
> +       return r;
>  }
>
>  static int ovl_other_xattr_get(const struct xattr_handler *handler,
> --
> 2.41.0
>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 0/5] Support nested overlayfs mounts
  2023-08-17 11:05 [PATCH v2 0/5] Support nested overlayfs mounts Alexander Larsson
                   ` (4 preceding siblings ...)
  2023-08-17 11:05 ` [PATCH v2 5/5] ovl: Add documentation on nesting of overlayfs mounts Alexander Larsson
@ 2023-08-17 14:45 ` Amir Goldstein
  5 siblings, 0 replies; 47+ messages in thread
From: Amir Goldstein @ 2023-08-17 14:45 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: miklos, linux-unionfs

On Thu, Aug 17, 2023 at 2:05 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> There are cases where you want to use an overlayfs mount as a lowerdir for
> another overlayfs mount. For example, if the system rootfs is on overlayfs due
> to composefs, or to make it volatile (via tmps), then you cannot currently store
> a lowerdir on the rootfs, becasue the inner overlayfs will eat all the whiteouts
> and overlay xattrs. This means you can't e.g. store on the rootfs a prepared
> container image for use using overlayfs.
>
> This patch series adds support for nesting of overlayfs mounts by escaping the
> problematic features on and unescaping them when exposing to the overlayfs user.
>
> This series is also available here:
>   https://github.com/alexlarsson/linux/tree/ovl-nesting
>
> And xfstest to test it is available here:
>   https://github.com/alexlarsson/xfstests/tree/overlayfs-nesting
>

Hi Alex,

Technically, the patches look pretty sane to me.
I'll need Miklos to weight in on the review as well and anyway
I think we should get verity merged (to 6.6) before considering this
(sort of) follow up.

I'd add some more tests as I commented on github including
mixing user and trusted xattr use case.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 3/5] ovl: Support escaped overlay.* xattrs
  2023-08-17 14:31   ` Amir Goldstein
@ 2023-08-17 15:00     ` Alexander Larsson
  0 siblings, 0 replies; 47+ messages in thread
From: Alexander Larsson @ 2023-08-17 15:00 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: miklos, linux-unionfs

On Thu, Aug 17, 2023 at 4:32 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Thu, Aug 17, 2023 at 2:05 PM Alexander Larsson <alexl@redhat.com> wrote:
> >
> > There are cases where you want to use an overlayfs mount as a lowerdir
> > for another overlayfs mount. For example, if the system rootfs is on
> > overlayfs due to composefs, or to make it volatile (via tmps), then
> > you cannot currently store a lowerdir on the rootfs. This means you
> > can't e.g. store on the rootfs a prepared container image for use
> > using overlayfs.
> >
> > To work around this, we introduce an escapment mechanism for overlayfs
> > xattrs. Whenever the lower/upper dir has a xattr named
> > `overlay.overlay.XYZ`, we list it as overlay.XYZ in listxattrs, and
> > when the user calls getxattr or setxattr on `overlay.XYZ`, we apply to
> > `overlay.overlay.XYZ` in the backing directories.
> >
> > This allows storing any kind of overlay xattrs in a overlayfs mount
> > that can be used as a lowerdir in another mount. It is possible to
> > stack this mechanism multiple times, such that
> > overlay.overlay.overlay.XYZ will survive two levels of overlay mounts,
> > however this is not all that useful in practice because of stack depth
> > limitations of overlayfs mounts.
> >
>
> Please elaborate what happens when lower overlayfs is trusted.overlay
> and nested overlayfs is user.overlay or the other way around.
> Is it true that this patch would not be needed at all if this was the case?

For the xattrs it isn't really needed. For example, if the first
overlayfs mount is uses trusted.overlay mount and it has a directory
with a file that has a user.overlay xattr, then this file will be
correctly exposed in the first mount, and it will be usable by a
nested userxattr mount.

However, suppose the 2nd mount (the userxattr one) needs a whiteout
between its layers. It this is a plain whiteout file it will be
consumed by the first mount. For that to work you need the
trusted.overlay.nowhiteout xattr on the file. Same for the other way
around.

> In any case, I think that this case would not be uncommon, so it is worth
> adding it in the tests.

Yeah. Will look at adding this.

> Thanks,
> Amir.
>
> > Signed-off-by: Alexander Larsson <alexl@redhat.com>
> > ---
> >  fs/overlayfs/overlayfs.h |  7 ++++
> >  fs/overlayfs/xattrs.c    | 78 +++++++++++++++++++++++++++++++++++++---
> >  2 files changed, 81 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> > index ef993a543b2a..311e1f37ce84 100644
> > --- a/fs/overlayfs/overlayfs.h
> > +++ b/fs/overlayfs/overlayfs.h
> > @@ -32,6 +32,13 @@ enum ovl_path_type {
> >  #define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE
> >  #define OVL_XATTR_USER_PREFIX_LEN (sizeof(OVL_XATTR_USER_PREFIX) - 1)
> >
> > +#define OVL_XATTR_ESCAPE_PREFIX OVL_XATTR_NAMESPACE
> > +#define OVL_XATTR_ESCAPE_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_PREFIX) - 1)
> > +#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_ESCAPE_PREFIX
> > +#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_TRUSTED_PREFIX) - 1)
> > +#define OVL_XATTR_ESCAPE_USER_PREFIX OVL_XATTR_USER_PREFIX OVL_XATTR_ESCAPE_PREFIX
> > +#define OVL_XATTR_ESCAPE_USER_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_USER_PREFIX) - 1)
> > +
> >  enum ovl_xattr {
> >         OVL_XATTR_OPAQUE,
> >         OVL_XATTR_REDIRECT,
> > diff --git a/fs/overlayfs/xattrs.c b/fs/overlayfs/xattrs.c
> > index b8ea96606ea8..27b31f812eb1 100644
> > --- a/fs/overlayfs/xattrs.c
> > +++ b/fs/overlayfs/xattrs.c
> > @@ -4,6 +4,18 @@
> >  #include <linux/xattr.h>
> >  #include "overlayfs.h"
> >
> > +bool ovl_is_escaped_xattr(struct super_block *sb, const char *name)
> > +{
> > +       struct ovl_fs *ofs = sb->s_fs_info;
> > +
> > +       if (ofs->config.userxattr)
> > +               return strncmp(name, OVL_XATTR_ESCAPE_USER_PREFIX,
> > +                              OVL_XATTR_ESCAPE_USER_PREFIX_LEN) == 0;
> > +       else
> > +               return strncmp(name, OVL_XATTR_ESCAPE_TRUSTED_PREFIX,
> > +                              OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN - 1) == 0;
> > +}
> > +
> >  bool ovl_is_private_xattr(struct super_block *sb, const char *name)
> >  {
> >         struct ovl_fs *ofs = OVL_FS(sb);
> > @@ -82,8 +94,8 @@ static int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char
> >
> >  static bool ovl_can_list(struct super_block *sb, const char *s)
> >  {
> > -       /* Never list private (.overlay) */
> > -       if (ovl_is_private_xattr(sb, s))
> > +       /* Never list non-escaped private (.overlay) */
> > +       if (ovl_is_private_xattr(sb, s) && !ovl_is_escaped_xattr(sb, s))
> >                 return false;
> >
> >         /* List all non-trusted xattrs */
> > @@ -97,10 +109,12 @@ static bool ovl_can_list(struct super_block *sb, const char *s)
> >  ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
> >  {
> >         struct dentry *realdentry = ovl_dentry_real(dentry);
> > +       struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
> >         ssize_t res;
> >         size_t len;
> >         char *s;
> >         const struct cred *old_cred;
> > +       size_t prefix_len;
> >
> >         old_cred = ovl_override_creds(dentry->d_sb);
> >         res = vfs_listxattr(realdentry, list, size);
> > @@ -108,6 +122,9 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
> >         if (res <= 0 || size == 0)
> >                 return res;
> >
> > +       prefix_len = ofs->config.userxattr ?
> > +               OVL_XATTR_USER_PREFIX_LEN : OVL_XATTR_TRUSTED_PREFIX_LEN;
> > +
> >         /* filter out private xattrs */
> >         for (s = list, len = res; len;) {
> >                 size_t slen = strnlen(s, len) + 1;
> > @@ -120,6 +137,12 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
> >                 if (!ovl_can_list(dentry->d_sb, s)) {
> >                         res -= slen;
> >                         memmove(s, s + slen, len);
> > +               } else if (ovl_is_escaped_xattr(dentry->d_sb, s)) {
> > +                       memmove(s + prefix_len,
> > +                               s + prefix_len + OVL_XATTR_ESCAPE_PREFIX_LEN,
> > +                               slen - (prefix_len + OVL_XATTR_ESCAPE_PREFIX_LEN) + len);
> > +                       res -= OVL_XATTR_ESCAPE_PREFIX_LEN;
> > +                       s += slen - OVL_XATTR_ESCAPE_PREFIX_LEN;
> >                 } else {
> >                         s += slen;
> >                 }
> > @@ -128,11 +151,47 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
> >         return res;
> >  }
> >
> > +static char *ovl_xattr_escape_name(const char *prefix, const char *name)
> > +{
> > +       size_t prefix_len = strlen(prefix);
> > +       size_t name_len = strlen(name);
> > +       size_t escaped_len;
> > +       char *escaped, *s;
> > +
> > +       escaped_len = prefix_len + OVL_XATTR_ESCAPE_PREFIX_LEN + name_len;
> > +       if (escaped_len > XATTR_NAME_MAX)
> > +               return ERR_PTR(-EOPNOTSUPP);
> > +
> > +       escaped = kmalloc(escaped_len + 1, GFP_KERNEL);
> > +       if (escaped == NULL)
> > +               return ERR_PTR(-ENOMEM);
> > +
> > +       s = escaped;
> > +       memcpy(s, prefix, prefix_len);
> > +       s += prefix_len;
> > +       memcpy(s, OVL_XATTR_ESCAPE_PREFIX, OVL_XATTR_ESCAPE_PREFIX_LEN);
> > +       s += OVL_XATTR_ESCAPE_PREFIX_LEN;
> > +       memcpy(s, name, name_len + 1);
> > +
> > +       return escaped;
> > +}
> > +
> >  static int ovl_own_xattr_get(const struct xattr_handler *handler,
> >                              struct dentry *dentry, struct inode *inode,
> >                              const char *name, void *buffer, size_t size)
> >  {
> > -       return -EOPNOTSUPP;
> > +       char *escaped;
> > +       int r;
> > +
> > +       escaped = ovl_xattr_escape_name(handler->prefix, name);
> > +       if (IS_ERR(escaped))
> > +               return PTR_ERR(escaped);
> > +
> > +       r = ovl_xattr_get(dentry, inode, escaped, buffer, size);
> > +
> > +       kfree(escaped);
> > +
> > +       return r;
> >  }
> >
> >  static int ovl_own_xattr_set(const struct xattr_handler *handler,
> > @@ -141,7 +200,18 @@ static int ovl_own_xattr_set(const struct xattr_handler *handler,
> >                              const char *name, const void *value,
> >                              size_t size, int flags)
> >  {
> > -       return -EOPNOTSUPP;
> > +       char *escaped;
> > +       int r;
> > +
> > +       escaped = ovl_xattr_escape_name(handler->prefix, name);
> > +       if (IS_ERR(escaped))
> > +               return PTR_ERR(escaped);
> > +
> > +       r = ovl_xattr_set(dentry, inode, escaped, value, size, flags);
> > +
> > +       kfree(escaped);
> > +
> > +       return r;
> >  }
> >
> >  static int ovl_other_xattr_get(const struct xattr_handler *handler,
> > --
> > 2.41.0
> >
>


-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-17 11:05 ` [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs Alexander Larsson
  2023-08-17 13:40   ` Amir Goldstein
@ 2023-08-21 10:59   ` Miklos Szeredi
  2023-08-21 12:55     ` Alexander Larsson
  2023-08-22 13:22     ` Alexander Larsson
  1 sibling, 2 replies; 47+ messages in thread
From: Miklos Szeredi @ 2023-08-21 10:59 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: linux-unionfs, amir73il

On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
>
> This is needed to properly stack overlay filesystems, I.E, being able
> to create a whiteout file on an overlay mount and then use that as
> part of the lowerdir in another overlay mount.
>
> The way this works is that we create a regular whiteout, but set the
> `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> whiteout we check this xattr and don't treat it as a whiteout if it is
> set. The xattr itself is then stripped and when viewed as part of the
> overlayfs mount it looks like a regular whiteout.
>

I understand the motivation, but don't have good feelings about the
implementation.  Like the xattr escaping this should also have the
property that when fed to an old kernel version, it shouldn't
interpret this object as a whiteout.  Whether it remains hidden like
the escaped xattrs or if it shows up as something else is
uninteresting.

It could just be a zero sized regular file with "overlay.whiteout".

But we are also getting to the stage where the number of getxattr
queries on lookup could be a performance problem.  Or maybe not.  It
would be good to look at this aspect as well when adding xattr queries
to lookup.


Thanks,
Millos

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-21 10:59   ` Miklos Szeredi
@ 2023-08-21 12:55     ` Alexander Larsson
  2023-08-21 13:25       ` Amir Goldstein
  2023-08-22 13:22     ` Alexander Larsson
  1 sibling, 1 reply; 47+ messages in thread
From: Alexander Larsson @ 2023-08-21 12:55 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-unionfs, amir73il

On Mon, Aug 21, 2023 at 1:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
> >
> > This is needed to properly stack overlay filesystems, I.E, being able
> > to create a whiteout file on an overlay mount and then use that as
> > part of the lowerdir in another overlay mount.
> >
> > The way this works is that we create a regular whiteout, but set the
> > `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> > whiteout we check this xattr and don't treat it as a whiteout if it is
> > set. The xattr itself is then stripped and when viewed as part of the
> > overlayfs mount it looks like a regular whiteout.
> >
>
> I understand the motivation, but don't have good feelings about the
> implementation.  Like the xattr escaping this should also have the
> property that when fed to an old kernel version, it shouldn't
> interpret this object as a whiteout.  Whether it remains hidden like
> the escaped xattrs or if it shows up as something else is
> uninteresting.
>
> It could just be a zero sized regular file with "overlay.whiteout".
>
> But we are also getting to the stage where the number of getxattr
> queries on lookup could be a performance problem.  Or maybe not.  It
> would be good to look at this aspect as well when adding xattr queries
> to lookup.

Wanting to avoid (as much as possible) the reading of more xattrs
which would affect performance of every regular file was the reason
for this particular implementation. I will do some more thinking and
see if I can come up with an alternative approach.

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-21 12:55     ` Alexander Larsson
@ 2023-08-21 13:25       ` Amir Goldstein
  2023-08-21 15:31         ` Alexander Larsson
  0 siblings, 1 reply; 47+ messages in thread
From: Amir Goldstein @ 2023-08-21 13:25 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: Miklos Szeredi, linux-unionfs

On Mon, Aug 21, 2023 at 3:55 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> On Mon, Aug 21, 2023 at 1:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> >
> > On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
> > >
> > > This is needed to properly stack overlay filesystems, I.E, being able
> > > to create a whiteout file on an overlay mount and then use that as
> > > part of the lowerdir in another overlay mount.
> > >
> > > The way this works is that we create a regular whiteout, but set the
> > > `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> > > whiteout we check this xattr and don't treat it as a whiteout if it is
> > > set. The xattr itself is then stripped and when viewed as part of the
> > > overlayfs mount it looks like a regular whiteout.
> > >
> >
> > I understand the motivation, but don't have good feelings about the
> > implementation.  Like the xattr escaping this should also have the
> > property that when fed to an old kernel version, it shouldn't
> > interpret this object as a whiteout.  Whether it remains hidden like
> > the escaped xattrs or if it shows up as something else is
> > uninteresting.
> >
> > It could just be a zero sized regular file with "overlay.whiteout".
> >
> > But we are also getting to the stage where the number of getxattr
> > queries on lookup could be a performance problem.  Or maybe not.  It
> > would be good to look at this aspect as well when adding xattr queries
> > to lookup.
>
> Wanting to avoid (as much as possible) the reading of more xattrs
> which would affect performance of every regular file was the reason
> for this particular implementation. I will do some more thinking and
> see if I can come up with an alternative approach.

I'd just like to add that, although the cost of getxattr is mostly fs
dependent, from my experience, the cost of several getxattr on one
file are usually amortized in the cost of the first getxattr on that file.

So while it is valuable to avoid any getxattr if possible, avoid an
extra getxattr is often less critical.
Again, this statement may not be accurate for all fs.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-21 13:25       ` Amir Goldstein
@ 2023-08-21 15:31         ` Alexander Larsson
  2023-08-21 15:40           ` Miklos Szeredi
  0 siblings, 1 reply; 47+ messages in thread
From: Alexander Larsson @ 2023-08-21 15:31 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, linux-unionfs

On Mon, Aug 21, 2023 at 3:26 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Mon, Aug 21, 2023 at 3:55 PM Alexander Larsson <alexl@redhat.com> wrote:
> >
> > On Mon, Aug 21, 2023 at 1:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > >
> > > On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
> > > >
> > > > This is needed to properly stack overlay filesystems, I.E, being able
> > > > to create a whiteout file on an overlay mount and then use that as
> > > > part of the lowerdir in another overlay mount.
> > > >
> > > > The way this works is that we create a regular whiteout, but set the
> > > > `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> > > > whiteout we check this xattr and don't treat it as a whiteout if it is
> > > > set. The xattr itself is then stripped and when viewed as part of the
> > > > overlayfs mount it looks like a regular whiteout.
> > > >
> > >
> > > I understand the motivation, but don't have good feelings about the
> > > implementation.  Like the xattr escaping this should also have the
> > > property that when fed to an old kernel version, it shouldn't
> > > interpret this object as a whiteout.  Whether it remains hidden like
> > > the escaped xattrs or if it shows up as something else is
> > > uninteresting.
> > >
> > > It could just be a zero sized regular file with "overlay.whiteout".
> > >
> > > But we are also getting to the stage where the number of getxattr
> > > queries on lookup could be a performance problem.  Or maybe not.  It
> > > would be good to look at this aspect as well when adding xattr queries
> > > to lookup.
> >
> > Wanting to avoid (as much as possible) the reading of more xattrs
> > which would affect performance of every regular file was the reason
> > for this particular implementation. I will do some more thinking and
> > see if I can come up with an alternative approach.
>
> I'd just like to add that, although the cost of getxattr is mostly fs
> dependent, from my experience, the cost of several getxattr on one
> file are usually amortized in the cost of the first getxattr on that file.
>
> So while it is valuable to avoid any getxattr if possible, avoid an
> extra getxattr is often less critical.
> Again, this statement may not be accurate for all fs.

I dunno. Each time I look at the xattr codepaths they seem very long,
and there is no vfs-side caching. Each getxattr() will invoke the
entire stack of reading and (re)parsing the metadata blocks needed for
xattr info. However, cpus are fast, and typically the blocks
themselves are cached in the buffer cache, so maybe this doesn't
necessarily matter.

To check this I ran some quick performance tests. A single, cached
(i.e. repeated), missed (e.g. ENODATA) getxattr call on xfs and ext4
takes about 1-1.5 microseconds. This is about the same timescale as a
stat call, so pretty fast I guess (and probably even faster when the
kernel does it, as there is no syscall overhead). However, on e.g. a
fuse mount (I used the passthrough_ll example) it takes about 20 usec
because each call must be roundtripped via the fuse daemon. Similarly
I can imagine a network filesystem may be slow here, although I see
that NFS4 has an xattr cache.

So, I guess the end result is that it's probably ok to use an extra
getxattr here, and that fuse should probably grow an xattr cache.

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-21 15:31         ` Alexander Larsson
@ 2023-08-21 15:40           ` Miklos Szeredi
  2023-08-21 20:07             ` Alexander Larsson
  0 siblings, 1 reply; 47+ messages in thread
From: Miklos Szeredi @ 2023-08-21 15:40 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: Amir Goldstein, linux-unionfs

On Mon, 21 Aug 2023 at 17:31, Alexander Larsson <alexl@redhat.com> wrote:

> So, I guess the end result is that it's probably ok to use an extra
> getxattr here, and that fuse should probably grow an xattr cache.

Having an xattr cache in the page cache would be wonderful, but it's
not how normal filesystems work and I have no idea how to get there.
I should probably talk to Matthew.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-21 15:40           ` Miklos Szeredi
@ 2023-08-21 20:07             ` Alexander Larsson
  0 siblings, 0 replies; 47+ messages in thread
From: Alexander Larsson @ 2023-08-21 20:07 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: Amir Goldstein, linux-unionfs

On Mon, Aug 21, 2023 at 5:48 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Mon, 21 Aug 2023 at 17:31, Alexander Larsson <alexl@redhat.com> wrote:
>
> > So, I guess the end result is that it's probably ok to use an extra
> > getxattr here, and that fuse should probably grow an xattr cache.
>
> Having an xattr cache in the page cache would be wonderful, but it's
> not how normal filesystems work and I have no idea how to get there.
> I should probably talk to Matthew.

The bloom filter stuff that erofs is adding for this is cool. You
could do something like that generically to at least catch negative
xattr lookups. From a single listxattrs you could build such a 32bit
in-memory bloom filter for later xattr lookups. And a fuse daemon
could pre-seed the filter for an inode to avoid roundtrips.

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-21 10:59   ` Miklos Szeredi
  2023-08-21 12:55     ` Alexander Larsson
@ 2023-08-22 13:22     ` Alexander Larsson
  2023-08-22 13:56       ` Miklos Szeredi
  1 sibling, 1 reply; 47+ messages in thread
From: Alexander Larsson @ 2023-08-22 13:22 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-unionfs, amir73il

On Mon, Aug 21, 2023 at 1:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
> >
> > This is needed to properly stack overlay filesystems, I.E, being able
> > to create a whiteout file on an overlay mount and then use that as
> > part of the lowerdir in another overlay mount.
> >
> > The way this works is that we create a regular whiteout, but set the
> > `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> > whiteout we check this xattr and don't treat it as a whiteout if it is
> > set. The xattr itself is then stripped and when viewed as part of the
> > overlayfs mount it looks like a regular whiteout.
> >
>
> I understand the motivation, but don't have good feelings about the
> implementation.  Like the xattr escaping this should also have the
> property that when fed to an old kernel version, it shouldn't
> interpret this object as a whiteout.  Whether it remains hidden like
> the escaped xattrs or if it shows up as something else is
> uninteresting.
>
> It could just be a zero sized regular file with "overlay.whiteout".

So, I started doing this, where a whiteout is just a regular file with
the xattr set. Initially I thought I only needed to check the xattr
during lookup and convert the inode mode from S_IFREG to S_IFCHR.
However, I also need to hook up readdir and convert DT_REG to DT_CHR,
otherwise readdir will report the wrong d_type. To make it worse,
overlayfs itself looks for DT_CHR to handle whiteouts when listing
files, so nesting is not working without that.

The only way I see to implement that conversion is to call getxattr()
on every DT_REG file during readdir(), and while a single getxattr()
on lookup is fine, I don't think that is.

Any other ideas?

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-22 13:22     ` Alexander Larsson
@ 2023-08-22 13:56       ` Miklos Szeredi
  2023-08-22 14:25         ` Alexander Larsson
  0 siblings, 1 reply; 47+ messages in thread
From: Miklos Szeredi @ 2023-08-22 13:56 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: linux-unionfs, amir73il

On Tue, 22 Aug 2023 at 15:22, Alexander Larsson <alexl@redhat.com> wrote:
>
> On Mon, Aug 21, 2023 at 1:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> >
> > On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
> > >
> > > This is needed to properly stack overlay filesystems, I.E, being able
> > > to create a whiteout file on an overlay mount and then use that as
> > > part of the lowerdir in another overlay mount.
> > >
> > > The way this works is that we create a regular whiteout, but set the
> > > `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> > > whiteout we check this xattr and don't treat it as a whiteout if it is
> > > set. The xattr itself is then stripped and when viewed as part of the
> > > overlayfs mount it looks like a regular whiteout.
> > >
> >
> > I understand the motivation, but don't have good feelings about the
> > implementation.  Like the xattr escaping this should also have the
> > property that when fed to an old kernel version, it shouldn't
> > interpret this object as a whiteout.  Whether it remains hidden like
> > the escaped xattrs or if it shows up as something else is
> > uninteresting.
> >
> > It could just be a zero sized regular file with "overlay.whiteout".
>
> So, I started doing this, where a whiteout is just a regular file with
> the xattr set. Initially I thought I only needed to check the xattr
> during lookup and convert the inode mode from S_IFREG to S_IFCHR.
> However, I also need to hook up readdir and convert DT_REG to DT_CHR,
> otherwise readdir will report the wrong d_type. To make it worse,
> overlayfs itself looks for DT_CHR to handle whiteouts when listing
> files, so nesting is not working without that.
>
> The only way I see to implement that conversion is to call getxattr()
> on every DT_REG file during readdir(), and while a single getxattr()
> on lookup is fine, I don't think that is.
>
> Any other ideas?

Not messing with d_type seems a good idea.   How about a random
unreserved chardev?

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-22 13:56       ` Miklos Szeredi
@ 2023-08-22 14:25         ` Alexander Larsson
  2023-08-22 14:36           ` Alexander Larsson
  0 siblings, 1 reply; 47+ messages in thread
From: Alexander Larsson @ 2023-08-22 14:25 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-unionfs, amir73il

On Tue, Aug 22, 2023 at 3:56 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Tue, 22 Aug 2023 at 15:22, Alexander Larsson <alexl@redhat.com> wrote:
> >
> > On Mon, Aug 21, 2023 at 1:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > >
> > > On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
> > > >
> > > > This is needed to properly stack overlay filesystems, I.E, being able
> > > > to create a whiteout file on an overlay mount and then use that as
> > > > part of the lowerdir in another overlay mount.
> > > >
> > > > The way this works is that we create a regular whiteout, but set the
> > > > `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> > > > whiteout we check this xattr and don't treat it as a whiteout if it is
> > > > set. The xattr itself is then stripped and when viewed as part of the
> > > > overlayfs mount it looks like a regular whiteout.
> > > >
> > >
> > > I understand the motivation, but don't have good feelings about the
> > > implementation.  Like the xattr escaping this should also have the
> > > property that when fed to an old kernel version, it shouldn't
> > > interpret this object as a whiteout.  Whether it remains hidden like
> > > the escaped xattrs or if it shows up as something else is
> > > uninteresting.
> > >
> > > It could just be a zero sized regular file with "overlay.whiteout".
> >
> > So, I started doing this, where a whiteout is just a regular file with
> > the xattr set. Initially I thought I only needed to check the xattr
> > during lookup and convert the inode mode from S_IFREG to S_IFCHR.
> > However, I also need to hook up readdir and convert DT_REG to DT_CHR,
> > otherwise readdir will report the wrong d_type. To make it worse,
> > overlayfs itself looks for DT_CHR to handle whiteouts when listing
> > files, so nesting is not working without that.
> >
> > The only way I see to implement that conversion is to call getxattr()
> > on every DT_REG file during readdir(), and while a single getxattr()
> > on lookup is fine, I don't think that is.
> >
> > Any other ideas?
>
> Not messing with d_type seems a good idea.   How about a random
> unreserved chardev?

Only the whiteout one (0,0) can be created by non-root users.

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-22 14:25         ` Alexander Larsson
@ 2023-08-22 14:36           ` Alexander Larsson
  2023-08-22 15:02             ` Miklos Szeredi
  2023-08-22 15:30             ` Amir Goldstein
  0 siblings, 2 replies; 47+ messages in thread
From: Alexander Larsson @ 2023-08-22 14:36 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-unionfs, amir73il

On Tue, Aug 22, 2023 at 4:25 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> On Tue, Aug 22, 2023 at 3:56 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> >
> > On Tue, 22 Aug 2023 at 15:22, Alexander Larsson <alexl@redhat.com> wrote:
> > >
> > > On Mon, Aug 21, 2023 at 1:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > >
> > > > On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
> > > > >
> > > > > This is needed to properly stack overlay filesystems, I.E, being able
> > > > > to create a whiteout file on an overlay mount and then use that as
> > > > > part of the lowerdir in another overlay mount.
> > > > >
> > > > > The way this works is that we create a regular whiteout, but set the
> > > > > `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> > > > > whiteout we check this xattr and don't treat it as a whiteout if it is
> > > > > set. The xattr itself is then stripped and when viewed as part of the
> > > > > overlayfs mount it looks like a regular whiteout.
> > > > >
> > > >
> > > > I understand the motivation, but don't have good feelings about the
> > > > implementation.  Like the xattr escaping this should also have the
> > > > property that when fed to an old kernel version, it shouldn't
> > > > interpret this object as a whiteout.  Whether it remains hidden like
> > > > the escaped xattrs or if it shows up as something else is
> > > > uninteresting.
> > > >
> > > > It could just be a zero sized regular file with "overlay.whiteout".
> > >
> > > So, I started doing this, where a whiteout is just a regular file with
> > > the xattr set. Initially I thought I only needed to check the xattr
> > > during lookup and convert the inode mode from S_IFREG to S_IFCHR.
> > > However, I also need to hook up readdir and convert DT_REG to DT_CHR,
> > > otherwise readdir will report the wrong d_type. To make it worse,
> > > overlayfs itself looks for DT_CHR to handle whiteouts when listing
> > > files, so nesting is not working without that.
> > >
> > > The only way I see to implement that conversion is to call getxattr()
> > > on every DT_REG file during readdir(), and while a single getxattr()
> > > on lookup is fine, I don't think that is.
> > >
> > > Any other ideas?
> >
> > Not messing with d_type seems a good idea.   How about a random
> > unreserved chardev?
>
> Only the whiteout one (0,0) can be created by non-root users.

I was thinking of (ab)using DT_SOCK or DT_FIFO, but turns out you
can't store xattrs on such files.
-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-22 14:36           ` Alexander Larsson
@ 2023-08-22 15:02             ` Miklos Szeredi
  2023-08-22 15:31               ` Alexander Larsson
  2023-08-22 15:30             ` Amir Goldstein
  1 sibling, 1 reply; 47+ messages in thread
From: Miklos Szeredi @ 2023-08-22 15:02 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: linux-unionfs, amir73il

On Tue, 22 Aug 2023 at 16:36, Alexander Larsson <alexl@redhat.com> wrote:
>
> On Tue, Aug 22, 2023 at 4:25 PM Alexander Larsson <alexl@redhat.com> wrote:
> >
> > On Tue, Aug 22, 2023 at 3:56 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > >
> > > On Tue, 22 Aug 2023 at 15:22, Alexander Larsson <alexl@redhat.com> wrote:

> > > > The only way I see to implement that conversion is to call getxattr()
> > > > on every DT_REG file during readdir(), and while a single getxattr()
> > > > on lookup is fine, I don't think that is.
> > > >
> > > > Any other ideas?
> > >
> > > Not messing with d_type seems a good idea.   How about a random
> > > unreserved chardev?
> >
> > Only the whiteout one (0,0) can be created by non-root users.
>
> I was thinking of (ab)using DT_SOCK or DT_FIFO, but turns out you
> can't store xattrs on such files.

The "user." xattr namespace is for regular files and directories only.
And "trusted." is privileged, obviously.

At this point I'm not sure what are your requirements.  Does creating
escaped whiteout need to be unprivleged?  If so, how did the
"user.overlay.nowhiteout" work?

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-22 14:36           ` Alexander Larsson
  2023-08-22 15:02             ` Miklos Szeredi
@ 2023-08-22 15:30             ` Amir Goldstein
  2023-08-22 15:43               ` Alexander Larsson
  2023-08-23 13:13               ` Alexander Larsson
  1 sibling, 2 replies; 47+ messages in thread
From: Amir Goldstein @ 2023-08-22 15:30 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: Miklos Szeredi, linux-unionfs

On Tue, Aug 22, 2023 at 5:36 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> On Tue, Aug 22, 2023 at 4:25 PM Alexander Larsson <alexl@redhat.com> wrote:
> >
> > On Tue, Aug 22, 2023 at 3:56 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > >
> > > On Tue, 22 Aug 2023 at 15:22, Alexander Larsson <alexl@redhat.com> wrote:
> > > >
> > > > On Mon, Aug 21, 2023 at 1:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > >
> > > > > On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
> > > > > >
> > > > > > This is needed to properly stack overlay filesystems, I.E, being able
> > > > > > to create a whiteout file on an overlay mount and then use that as
> > > > > > part of the lowerdir in another overlay mount.
> > > > > >
> > > > > > The way this works is that we create a regular whiteout, but set the
> > > > > > `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> > > > > > whiteout we check this xattr and don't treat it as a whiteout if it is
> > > > > > set. The xattr itself is then stripped and when viewed as part of the
> > > > > > overlayfs mount it looks like a regular whiteout.
> > > > > >
> > > > >
> > > > > I understand the motivation, but don't have good feelings about the
> > > > > implementation.  Like the xattr escaping this should also have the
> > > > > property that when fed to an old kernel version, it shouldn't
> > > > > interpret this object as a whiteout.  Whether it remains hidden like
> > > > > the escaped xattrs or if it shows up as something else is
> > > > > uninteresting.
> > > > >
> > > > > It could just be a zero sized regular file with "overlay.whiteout".
> > > >
> > > > So, I started doing this, where a whiteout is just a regular file with
> > > > the xattr set. Initially I thought I only needed to check the xattr
> > > > during lookup and convert the inode mode from S_IFREG to S_IFCHR.
> > > > However, I also need to hook up readdir and convert DT_REG to DT_CHR,
> > > > otherwise readdir will report the wrong d_type. To make it worse,
> > > > overlayfs itself looks for DT_CHR to handle whiteouts when listing
> > > > files, so nesting is not working without that.
> > > >
> > > > The only way I see to implement that conversion is to call getxattr()
> > > > on every DT_REG file during readdir(), and while a single getxattr()
> > > > on lookup is fine, I don't think that is.
> > > >
> > > > Any other ideas?
> > >
> > > Not messing with d_type seems a good idea.   How about a random
> > > unreserved chardev?
> >
> > Only the whiteout one (0,0) can be created by non-root users.
>
> I was thinking of (ab)using DT_SOCK or DT_FIFO, but turns out you
> can't store xattrs on such files.

FWIW, there is also DT_WHT that was defined and never used.
But that is just an anecdote.

Regarding the issue of avoiding getxattr for every dirent.
Note that in readdir, dirent that goes through ovl_cache_update_ino()
calls lookup()/stat() on the overlay itself, so as long as ovl_lookup()
will treat overlay.whiteout file as a whiteout, the code
                 /* Mark a stale entry */
                p->is_whiteout = true;
will kick in and do the right thing for readdir wrt cleaning up
lower entries covered with whiteouts, regardless of DT_CHR.

Now all that is left is to make sure that ovl_cache_update_ino()
is called if there are possibly overlay.whiteout files.

For the case of nested ovl, upper and lower fs cannot be the same,
so ovl_same_fs() is false.
Therefore, as long as xino is enabled (this is the default),
ovl_same_dev() is true => ovl_xino_bits() > 0 =>
ovl_calc_d_ino() is true and ovl_cache_update_ino() will be
called for all merged dirents.

IOW, unless I am missing something, if you implement overlay.whiteout
logic in ovl_lookup() correctly, readdir should "just work" as long as
the mounter does not explicitly use -o xino=off.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-22 15:02             ` Miklos Szeredi
@ 2023-08-22 15:31               ` Alexander Larsson
  0 siblings, 0 replies; 47+ messages in thread
From: Alexander Larsson @ 2023-08-22 15:31 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-unionfs, amir73il

On Tue, Aug 22, 2023 at 5:03 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Tue, 22 Aug 2023 at 16:36, Alexander Larsson <alexl@redhat.com> wrote:
> >
> > On Tue, Aug 22, 2023 at 4:25 PM Alexander Larsson <alexl@redhat.com> wrote:
> > >
> > > On Tue, Aug 22, 2023 at 3:56 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > >
> > > > On Tue, 22 Aug 2023 at 15:22, Alexander Larsson <alexl@redhat.com> wrote:
>
> > > > > The only way I see to implement that conversion is to call getxattr()
> > > > > on every DT_REG file during readdir(), and while a single getxattr()
> > > > > on lookup is fine, I don't think that is.
> > > > >
> > > > > Any other ideas?
> > > >
> > > > Not messing with d_type seems a good idea.   How about a random
> > > > unreserved chardev?
> > >
> > > Only the whiteout one (0,0) can be created by non-root users.
> >
> > I was thinking of (ab)using DT_SOCK or DT_FIFO, but turns out you
> > can't store xattrs on such files.
>
> The "user." xattr namespace is for regular files and directories only.
> And "trusted." is privileged, obviously.

Ah, so for the trusted case using a DT_FIFO with a xattr could work.

> At this point I'm not sure what are your requirements.  Does creating
> escaped whiteout need to be unprivleged?  If so, how did the
> "user.overlay.nowhiteout" work?

I guess I didn't test the userxattrs version of whiteouts. Need to add
that to the xfstest changes.

So, it seems that it just isn't possible to support nesting whiteouts
with userxattrs in an unprivileged way.
Lets just focus on privileged use then. My personal primary goals lean
towards privileged use anyway, because composefs uses loopback erofs
mounts which require privileges anyway.

It feels sort of dangerous using a "real" char device node. What if
some driver happens to use that major/minor. Then we would not allow
using that device node in overlayfs, and it may cause a program to
unexpectedly open the device and talk to the driver. Otoh, not having
to mess with d_type is nice.

Alternatively we could use DT_FIFO files with a
trusted.overlay.whiteout. Then we can handle whiteouts for DT_FIFO in
the readdir code like how regular whiteouts work (i.e. something like
the maybe_whiteout list). On the assumption that both fifos and
whiteouts are not super common, this should probably work pretty well.

Opinions?

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-22 15:30             ` Amir Goldstein
@ 2023-08-22 15:43               ` Alexander Larsson
  2023-08-22 15:57                 ` Amir Goldstein
  2023-08-23 13:13               ` Alexander Larsson
  1 sibling, 1 reply; 47+ messages in thread
From: Alexander Larsson @ 2023-08-22 15:43 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, linux-unionfs

On Tue, Aug 22, 2023 at 5:31 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Tue, Aug 22, 2023 at 5:36 PM Alexander Larsson <alexl@redhat.com> wrote:
> >
> > On Tue, Aug 22, 2023 at 4:25 PM Alexander Larsson <alexl@redhat.com> wrote:
> > >
> > > On Tue, Aug 22, 2023 at 3:56 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > >
> > > > On Tue, 22 Aug 2023 at 15:22, Alexander Larsson <alexl@redhat.com> wrote:
> > > > >
> > > > > On Mon, Aug 21, 2023 at 1:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > > >
> > > > > > On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
> > > > > > >
> > > > > > > This is needed to properly stack overlay filesystems, I.E, being able
> > > > > > > to create a whiteout file on an overlay mount and then use that as
> > > > > > > part of the lowerdir in another overlay mount.
> > > > > > >
> > > > > > > The way this works is that we create a regular whiteout, but set the
> > > > > > > `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> > > > > > > whiteout we check this xattr and don't treat it as a whiteout if it is
> > > > > > > set. The xattr itself is then stripped and when viewed as part of the
> > > > > > > overlayfs mount it looks like a regular whiteout.
> > > > > > >
> > > > > >
> > > > > > I understand the motivation, but don't have good feelings about the
> > > > > > implementation.  Like the xattr escaping this should also have the
> > > > > > property that when fed to an old kernel version, it shouldn't
> > > > > > interpret this object as a whiteout.  Whether it remains hidden like
> > > > > > the escaped xattrs or if it shows up as something else is
> > > > > > uninteresting.
> > > > > >
> > > > > > It could just be a zero sized regular file with "overlay.whiteout".
> > > > >
> > > > > So, I started doing this, where a whiteout is just a regular file with
> > > > > the xattr set. Initially I thought I only needed to check the xattr
> > > > > during lookup and convert the inode mode from S_IFREG to S_IFCHR.
> > > > > However, I also need to hook up readdir and convert DT_REG to DT_CHR,
> > > > > otherwise readdir will report the wrong d_type. To make it worse,
> > > > > overlayfs itself looks for DT_CHR to handle whiteouts when listing
> > > > > files, so nesting is not working without that.
> > > > >
> > > > > The only way I see to implement that conversion is to call getxattr()
> > > > > on every DT_REG file during readdir(), and while a single getxattr()
> > > > > on lookup is fine, I don't think that is.
> > > > >
> > > > > Any other ideas?
> > > >
> > > > Not messing with d_type seems a good idea.   How about a random
> > > > unreserved chardev?
> > >
> > > Only the whiteout one (0,0) can be created by non-root users.
> >
> > I was thinking of (ab)using DT_SOCK or DT_FIFO, but turns out you
> > can't store xattrs on such files.
>
> FWIW, there is also DT_WHT that was defined and never used.
> But that is just an anecdote.
>
> Regarding the issue of avoiding getxattr for every dirent.
> Note that in readdir, dirent that goes through ovl_cache_update_ino()
> calls lookup()/stat() on the overlay itself, so as long as ovl_lookup()
> will treat overlay.whiteout file as a whiteout, the code
>                  /* Mark a stale entry */
>                 p->is_whiteout = true;
> will kick in and do the right thing for readdir wrt cleaning up
> lower entries covered with whiteouts, regardless of DT_CHR.

We don't want to treat this file as a whiteout though. We want it to
be exposed as a regular file that looks like a whiteout marker file
(i.e. char dev 0,0). Or am I missing something?


-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-22 15:43               ` Alexander Larsson
@ 2023-08-22 15:57                 ` Amir Goldstein
  2023-08-22 16:18                   ` Miklos Szeredi
  2023-08-23  8:51                   ` Alexander Larsson
  0 siblings, 2 replies; 47+ messages in thread
From: Amir Goldstein @ 2023-08-22 15:57 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: Miklos Szeredi, linux-unionfs

On Tue, Aug 22, 2023 at 6:43 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> On Tue, Aug 22, 2023 at 5:31 PM Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Tue, Aug 22, 2023 at 5:36 PM Alexander Larsson <alexl@redhat.com> wrote:
> > >
> > > On Tue, Aug 22, 2023 at 4:25 PM Alexander Larsson <alexl@redhat.com> wrote:
> > > >
> > > > On Tue, Aug 22, 2023 at 3:56 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > >
> > > > > On Tue, 22 Aug 2023 at 15:22, Alexander Larsson <alexl@redhat.com> wrote:
> > > > > >
> > > > > > On Mon, Aug 21, 2023 at 1:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > > > >
> > > > > > > On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
> > > > > > > >
> > > > > > > > This is needed to properly stack overlay filesystems, I.E, being able
> > > > > > > > to create a whiteout file on an overlay mount and then use that as
> > > > > > > > part of the lowerdir in another overlay mount.
> > > > > > > >
> > > > > > > > The way this works is that we create a regular whiteout, but set the
> > > > > > > > `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> > > > > > > > whiteout we check this xattr and don't treat it as a whiteout if it is
> > > > > > > > set. The xattr itself is then stripped and when viewed as part of the
> > > > > > > > overlayfs mount it looks like a regular whiteout.
> > > > > > > >
> > > > > > >
> > > > > > > I understand the motivation, but don't have good feelings about the
> > > > > > > implementation.  Like the xattr escaping this should also have the
> > > > > > > property that when fed to an old kernel version, it shouldn't
> > > > > > > interpret this object as a whiteout.  Whether it remains hidden like
> > > > > > > the escaped xattrs or if it shows up as something else is
> > > > > > > uninteresting.
> > > > > > >
> > > > > > > It could just be a zero sized regular file with "overlay.whiteout".
> > > > > >
> > > > > > So, I started doing this, where a whiteout is just a regular file with
> > > > > > the xattr set. Initially I thought I only needed to check the xattr
> > > > > > during lookup and convert the inode mode from S_IFREG to S_IFCHR.
> > > > > > However, I also need to hook up readdir and convert DT_REG to DT_CHR,
> > > > > > otherwise readdir will report the wrong d_type. To make it worse,
> > > > > > overlayfs itself looks for DT_CHR to handle whiteouts when listing
> > > > > > files, so nesting is not working without that.
> > > > > >
> > > > > > The only way I see to implement that conversion is to call getxattr()
> > > > > > on every DT_REG file during readdir(), and while a single getxattr()
> > > > > > on lookup is fine, I don't think that is.
> > > > > >
> > > > > > Any other ideas?
> > > > >
> > > > > Not messing with d_type seems a good idea.   How about a random
> > > > > unreserved chardev?
> > > >
> > > > Only the whiteout one (0,0) can be created by non-root users.
> > >
> > > I was thinking of (ab)using DT_SOCK or DT_FIFO, but turns out you
> > > can't store xattrs on such files.
> >
> > FWIW, there is also DT_WHT that was defined and never used.
> > But that is just an anecdote.
> >
> > Regarding the issue of avoiding getxattr for every dirent.
> > Note that in readdir, dirent that goes through ovl_cache_update_ino()
> > calls lookup()/stat() on the overlay itself, so as long as ovl_lookup()
> > will treat overlay.whiteout file as a whiteout, the code
> >                  /* Mark a stale entry */
> >                 p->is_whiteout = true;
> > will kick in and do the right thing for readdir wrt cleaning up
> > lower entries covered with whiteouts, regardless of DT_CHR.
>
> We don't want to treat this file as a whiteout though. We want it to
> be exposed as a regular file that looks like a whiteout marker file
> (i.e. char dev 0,0). Or am I missing something?
>

Not sure if you really need to emulate chardev(0,0) at all.

Suppose that you just define a new way to express a whiteout -
an empty regular file with xattr overlay.whiteout.

Now you could use either chardev(0,0) or overlay.whiteout
to compose overlayfs layers, although internally, ovl driver
only creates chardev(0,0) to cover lower dentries.
I think that is what Miklos meant?

Now you don't need to implement mknod(c,0,0) in overlayfs.
You need to teach ovl_lookup() about the new whiteout format
(which I think you already did) and the problem you mentioned
w.r.t readdir and DT_CHR is moot as long as the composefs overlayfs,
whose lower layer is the ovl containing overlay.whiteout files
is mounted with the default xino enabled.

Did I miss anything?

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-22 15:57                 ` Amir Goldstein
@ 2023-08-22 16:18                   ` Miklos Szeredi
  2023-08-22 17:29                     ` Amir Goldstein
  2023-08-23  8:51                   ` Alexander Larsson
  1 sibling, 1 reply; 47+ messages in thread
From: Miklos Szeredi @ 2023-08-22 16:18 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Alexander Larsson, linux-unionfs

On Tue, 22 Aug 2023 at 17:57, Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Tue, Aug 22, 2023 at 6:43 PM Alexander Larsson <alexl@redhat.com> wrote:
> >
> > On Tue, Aug 22, 2023 at 5:31 PM Amir Goldstein <amir73il@gmail.com> wrote:

> > > FWIW, there is also DT_WHT that was defined and never used.
> > > But that is just an anecdote.

Overlay could be the first filesystem to set DT_WHT in its readdir.
I wouldn't mind if others would follow suit, but it's not a high
priority thing.



> > >
> > > Regarding the issue of avoiding getxattr for every dirent.
> > > Note that in readdir, dirent that goes through ovl_cache_update_ino()
> > > calls lookup()/stat() on the overlay itself, so as long as ovl_lookup()
> > > will treat overlay.whiteout file as a whiteout, the code
> > >                  /* Mark a stale entry */
> > >                 p->is_whiteout = true;
> > > will kick in and do the right thing for readdir wrt cleaning up
> > > lower entries covered with whiteouts, regardless of DT_CHR.
> >
> > We don't want to treat this file as a whiteout though. We want it to
> > be exposed as a regular file that looks like a whiteout marker file
> > (i.e. char dev 0,0). Or am I missing something?
> >
>
> Not sure if you really need to emulate chardev(0,0) at all.
>
> Suppose that you just define a new way to express a whiteout -
> an empty regular file with xattr overlay.whiteout.

Oh, you mean overlay.overlay.whiteout on realfile, which gets turned
into overlay.whiteout on bottom overlay, which gets interpreted as a
whiteout on top overlay?

I suppose that would work too, but it's a bit of a layering violation.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-22 16:18                   ` Miklos Szeredi
@ 2023-08-22 17:29                     ` Amir Goldstein
  2023-09-08  2:21                       ` Gao Xiang
  0 siblings, 1 reply; 47+ messages in thread
From: Amir Goldstein @ 2023-08-22 17:29 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: Alexander Larsson, linux-unionfs

On Tue, Aug 22, 2023 at 7:19 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Tue, 22 Aug 2023 at 17:57, Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Tue, Aug 22, 2023 at 6:43 PM Alexander Larsson <alexl@redhat.com> wrote:
> > >
> > > On Tue, Aug 22, 2023 at 5:31 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> > > > FWIW, there is also DT_WHT that was defined and never used.
> > > > But that is just an anecdote.
>
> Overlay could be the first filesystem to set DT_WHT in its readdir.
> I wouldn't mind if others would follow suit, but it's not a high
> priority thing.
>
>
>
> > > >
> > > > Regarding the issue of avoiding getxattr for every dirent.
> > > > Note that in readdir, dirent that goes through ovl_cache_update_ino()
> > > > calls lookup()/stat() on the overlay itself, so as long as ovl_lookup()
> > > > will treat overlay.whiteout file as a whiteout, the code
> > > >                  /* Mark a stale entry */
> > > >                 p->is_whiteout = true;
> > > > will kick in and do the right thing for readdir wrt cleaning up
> > > > lower entries covered with whiteouts, regardless of DT_CHR.
> > >
> > > We don't want to treat this file as a whiteout though. We want it to
> > > be exposed as a regular file that looks like a whiteout marker file
> > > (i.e. char dev 0,0). Or am I missing something?
> > >
> >
> > Not sure if you really need to emulate chardev(0,0) at all.
> >
> > Suppose that you just define a new way to express a whiteout -
> > an empty regular file with xattr overlay.whiteout.
>
> Oh, you mean overlay.overlay.whiteout on realfile, which gets turned
> into overlay.whiteout on bottom overlay, which gets interpreted as a
> whiteout on top overlay?
>
> I suppose that would work too, but it's a bit of a layering violation.
>

I proposed to look at the two features independently:
1. Nesting of overlayfs xattrs (patch 3/5)
2. Alternative format for whiteout (overlay.whiteout) that can be used
   by container tools converting OCI/tar images to overlayfs images

Together, they provide a solution to Alexander's use case.

IIUC, the way this is intended to work is that mkfs.composefs
is running inside a container, whose work directory is overlayfs
and it composes some lower layers on that host mounted overlayfs.

mkfs.composefs composes layers with overlay.{metacopy,whiteout,redirect}
xattrs (up to here it is standard mkfs.composefs) and because those layers
are stored in overlayfs, the xattrs are stored in the host fs as
overlay.overlay.*.

I hope I got the use case correctly?
Why is that a layering violation?

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-22 15:57                 ` Amir Goldstein
  2023-08-22 16:18                   ` Miklos Szeredi
@ 2023-08-23  8:51                   ` Alexander Larsson
  1 sibling, 0 replies; 47+ messages in thread
From: Alexander Larsson @ 2023-08-23  8:51 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, linux-unionfs

On Tue, Aug 22, 2023 at 5:57 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Tue, Aug 22, 2023 at 6:43 PM Alexander Larsson <alexl@redhat.com> wrote:
> >
> > On Tue, Aug 22, 2023 at 5:31 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > >
> > > On Tue, Aug 22, 2023 at 5:36 PM Alexander Larsson <alexl@redhat.com> wrote:
> > > >
> > > > On Tue, Aug 22, 2023 at 4:25 PM Alexander Larsson <alexl@redhat.com> wrote:
> > > > >
> > > > > On Tue, Aug 22, 2023 at 3:56 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > > >
> > > > > > On Tue, 22 Aug 2023 at 15:22, Alexander Larsson <alexl@redhat.com> wrote:
> > > > > > >
> > > > > > > On Mon, Aug 21, 2023 at 1:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > > > > >
> > > > > > > > On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > This is needed to properly stack overlay filesystems, I.E, being able
> > > > > > > > > to create a whiteout file on an overlay mount and then use that as
> > > > > > > > > part of the lowerdir in another overlay mount.
> > > > > > > > >
> > > > > > > > > The way this works is that we create a regular whiteout, but set the
> > > > > > > > > `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> > > > > > > > > whiteout we check this xattr and don't treat it as a whiteout if it is
> > > > > > > > > set. The xattr itself is then stripped and when viewed as part of the
> > > > > > > > > overlayfs mount it looks like a regular whiteout.
> > > > > > > > >
> > > > > > > >
> > > > > > > > I understand the motivation, but don't have good feelings about the
> > > > > > > > implementation.  Like the xattr escaping this should also have the
> > > > > > > > property that when fed to an old kernel version, it shouldn't
> > > > > > > > interpret this object as a whiteout.  Whether it remains hidden like
> > > > > > > > the escaped xattrs or if it shows up as something else is
> > > > > > > > uninteresting.
> > > > > > > >
> > > > > > > > It could just be a zero sized regular file with "overlay.whiteout".
> > > > > > >
> > > > > > > So, I started doing this, where a whiteout is just a regular file with
> > > > > > > the xattr set. Initially I thought I only needed to check the xattr
> > > > > > > during lookup and convert the inode mode from S_IFREG to S_IFCHR.
> > > > > > > However, I also need to hook up readdir and convert DT_REG to DT_CHR,
> > > > > > > otherwise readdir will report the wrong d_type. To make it worse,
> > > > > > > overlayfs itself looks for DT_CHR to handle whiteouts when listing
> > > > > > > files, so nesting is not working without that.
> > > > > > >
> > > > > > > The only way I see to implement that conversion is to call getxattr()
> > > > > > > on every DT_REG file during readdir(), and while a single getxattr()
> > > > > > > on lookup is fine, I don't think that is.
> > > > > > >
> > > > > > > Any other ideas?
> > > > > >
> > > > > > Not messing with d_type seems a good idea.   How about a random
> > > > > > unreserved chardev?
> > > > >
> > > > > Only the whiteout one (0,0) can be created by non-root users.
> > > >
> > > > I was thinking of (ab)using DT_SOCK or DT_FIFO, but turns out you
> > > > can't store xattrs on such files.
> > >
> > > FWIW, there is also DT_WHT that was defined and never used.
> > > But that is just an anecdote.
> > >
> > > Regarding the issue of avoiding getxattr for every dirent.
> > > Note that in readdir, dirent that goes through ovl_cache_update_ino()
> > > calls lookup()/stat() on the overlay itself, so as long as ovl_lookup()
> > > will treat overlay.whiteout file as a whiteout, the code
> > >                  /* Mark a stale entry */
> > >                 p->is_whiteout = true;
> > > will kick in and do the right thing for readdir wrt cleaning up
> > > lower entries covered with whiteouts, regardless of DT_CHR.
> >
> > We don't want to treat this file as a whiteout though. We want it to
> > be exposed as a regular file that looks like a whiteout marker file
> > (i.e. char dev 0,0). Or am I missing something?
> >
>
> Not sure if you really need to emulate chardev(0,0) at all.
>
> Suppose that you just define a new way to express a whiteout -
> an empty regular file with xattr overlay.whiteout.
>
> Now you could use either chardev(0,0) or overlay.whiteout
> to compose overlayfs layers, although internally, ovl driver
> only creates chardev(0,0) to cover lower dentries.
> I think that is what Miklos meant?
>
> Now you don't need to implement mknod(c,0,0) in overlayfs.
> You need to teach ovl_lookup() about the new whiteout format
> (which I think you already did) and the problem you mentioned
> w.r.t readdir and DT_CHR is moot as long as the composefs overlayfs,
> whose lower layer is the ovl containing overlay.whiteout files
> is mounted with the default xino enabled.

Ah, I understand now. I like this approach, and will try to get it implemented.

> Did I miss anything?

We will see what falls out of testing it.

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-22 15:30             ` Amir Goldstein
  2023-08-22 15:43               ` Alexander Larsson
@ 2023-08-23 13:13               ` Alexander Larsson
  2023-08-23 13:21                 ` Alexander Larsson
                                   ` (2 more replies)
  1 sibling, 3 replies; 47+ messages in thread
From: Alexander Larsson @ 2023-08-23 13:13 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, linux-unionfs

On Tue, Aug 22, 2023 at 5:31 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Tue, Aug 22, 2023 at 5:36 PM Alexander Larsson <alexl@redhat.com> wrote:
> >
> > On Tue, Aug 22, 2023 at 4:25 PM Alexander Larsson <alexl@redhat.com> wrote:
> > >
> > > On Tue, Aug 22, 2023 at 3:56 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > >
> > > > On Tue, 22 Aug 2023 at 15:22, Alexander Larsson <alexl@redhat.com> wrote:
> > > > >
> > > > > On Mon, Aug 21, 2023 at 1:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > > >
> > > > > > On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
> > > > > > >
> > > > > > > This is needed to properly stack overlay filesystems, I.E, being able
> > > > > > > to create a whiteout file on an overlay mount and then use that as
> > > > > > > part of the lowerdir in another overlay mount.
> > > > > > >
> > > > > > > The way this works is that we create a regular whiteout, but set the
> > > > > > > `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> > > > > > > whiteout we check this xattr and don't treat it as a whiteout if it is
> > > > > > > set. The xattr itself is then stripped and when viewed as part of the
> > > > > > > overlayfs mount it looks like a regular whiteout.
> > > > > > >
> > > > > >
> > > > > > I understand the motivation, but don't have good feelings about the
> > > > > > implementation.  Like the xattr escaping this should also have the
> > > > > > property that when fed to an old kernel version, it shouldn't
> > > > > > interpret this object as a whiteout.  Whether it remains hidden like
> > > > > > the escaped xattrs or if it shows up as something else is
> > > > > > uninteresting.
> > > > > >
> > > > > > It could just be a zero sized regular file with "overlay.whiteout".
> > > > >
> > > > > So, I started doing this, where a whiteout is just a regular file with
> > > > > the xattr set. Initially I thought I only needed to check the xattr
> > > > > during lookup and convert the inode mode from S_IFREG to S_IFCHR.
> > > > > However, I also need to hook up readdir and convert DT_REG to DT_CHR,
> > > > > otherwise readdir will report the wrong d_type. To make it worse,
> > > > > overlayfs itself looks for DT_CHR to handle whiteouts when listing
> > > > > files, so nesting is not working without that.
> > > > >
> > > > > The only way I see to implement that conversion is to call getxattr()
> > > > > on every DT_REG file during readdir(), and while a single getxattr()
> > > > > on lookup is fine, I don't think that is.
> > > > >
> > > > > Any other ideas?
> > > >
> > > > Not messing with d_type seems a good idea.   How about a random
> > > > unreserved chardev?
> > >
> > > Only the whiteout one (0,0) can be created by non-root users.
> >
> > I was thinking of (ab)using DT_SOCK or DT_FIFO, but turns out you
> > can't store xattrs on such files.
>
> FWIW, there is also DT_WHT that was defined and never used.
> But that is just an anecdote.
>
> Regarding the issue of avoiding getxattr for every dirent.
> Note that in readdir, dirent that goes through ovl_cache_update_ino()
> calls lookup()/stat() on the overlay itself, so as long as ovl_lookup()
> will treat overlay.whiteout file as a whiteout, the code
>                  /* Mark a stale entry */
>                 p->is_whiteout = true;
> will kick in and do the right thing for readdir wrt cleaning up
> lower entries covered with whiteouts, regardless of DT_CHR.
>
> Now all that is left is to make sure that ovl_cache_update_ino()
> is called if there are possibly overlay.whiteout files.
>
> For the case of nested ovl, upper and lower fs cannot be the same,
> so ovl_same_fs() is false.
> Therefore, as long as xino is enabled (this is the default),
> ovl_same_dev() is true => ovl_xino_bits() > 0 =>
> ovl_calc_d_ino() is true and ovl_cache_update_ino() will be
> called for all merged dirents.
>
> IOW, unless I am missing something, if you implement overlay.whiteout
> logic in ovl_lookup() correctly, readdir should "just work" as long as
> the mounter does not explicitly use -o xino=off.

Things are not that rosy for this.

First of all, the default value for OVERLAY_FS_XINO_AUTO is no, so by
default xino is not enabled. This means that overlay.whiteout only
works if you enable xino=on/auto in the mount.

Secondly, in the case where all upper and lower are on the same fs,
even if xino is enabled it is ignored. This is not the  case where the
lower is on a nested  overlayfs as you say, but maybe we want to
create a lower dir that works on both when stored on a overlayfs and
elsewere. Relying on xino which is essentially unrelated to whiteouts
to get enabled seems quite fragile.

I'm gonna instead try to make the alternative whiteout be a fifo with
an xattr. Then we can add DT_FIFO to the maybe_whiteout case during
readdir() which will then get picked up as a whiteout without relying
on xino.

I still have an issue with this approach though. Suppose we have a
rootfs that we want to make available as an overlayfs lower (using
trusted.overlay.*). We can escape all the xattrs in the rootfs with
trusted.overlay.overlay to make them show up correctly. However,
suppose the rootfs contains a char(0.0) whiteout. how do we encode
this? We can use a file with trusted.overlay.overlay.whiteout, which
then will be visible as trusted.overlay.whiteout in the overlayfs
mount. This is normally functionally equivalent to the char(0,0)
whiteout, but not if the user of the rootfs mounts the second
overlayfs using userxattr.

Maybe it could convert a char(0,0) to a file with both
trusted.overlay.overlay.whiteout and user.overlay.whiteout? The the
resulting overlayfs file could be used as a whiteout for both types,
just like the char node...

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-23 13:13               ` Alexander Larsson
@ 2023-08-23 13:21                 ` Alexander Larsson
  2023-08-23 14:42                 ` Amir Goldstein
  2023-08-23 14:50                 ` Alexander Larsson
  2 siblings, 0 replies; 47+ messages in thread
From: Alexander Larsson @ 2023-08-23 13:21 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, linux-unionfs

On Wed, Aug 23, 2023 at 3:13 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> On Tue, Aug 22, 2023 at 5:31 PM Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Tue, Aug 22, 2023 at 5:36 PM Alexander Larsson <alexl@redhat.com> wrote:
> > >
> > > On Tue, Aug 22, 2023 at 4:25 PM Alexander Larsson <alexl@redhat.com> wrote:
> > > >
> > > > On Tue, Aug 22, 2023 at 3:56 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > >
> > > > > On Tue, 22 Aug 2023 at 15:22, Alexander Larsson <alexl@redhat.com> wrote:
> > > > > >
> > > > > > On Mon, Aug 21, 2023 at 1:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > > > >
> > > > > > > On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
> > > > > > > >
> > > > > > > > This is needed to properly stack overlay filesystems, I.E, being able
> > > > > > > > to create a whiteout file on an overlay mount and then use that as
> > > > > > > > part of the lowerdir in another overlay mount.
> > > > > > > >
> > > > > > > > The way this works is that we create a regular whiteout, but set the
> > > > > > > > `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> > > > > > > > whiteout we check this xattr and don't treat it as a whiteout if it is
> > > > > > > > set. The xattr itself is then stripped and when viewed as part of the
> > > > > > > > overlayfs mount it looks like a regular whiteout.
> > > > > > > >
> > > > > > >
> > > > > > > I understand the motivation, but don't have good feelings about the
> > > > > > > implementation.  Like the xattr escaping this should also have the
> > > > > > > property that when fed to an old kernel version, it shouldn't
> > > > > > > interpret this object as a whiteout.  Whether it remains hidden like
> > > > > > > the escaped xattrs or if it shows up as something else is
> > > > > > > uninteresting.
> > > > > > >
> > > > > > > It could just be a zero sized regular file with "overlay.whiteout".
> > > > > >
> > > > > > So, I started doing this, where a whiteout is just a regular file with
> > > > > > the xattr set. Initially I thought I only needed to check the xattr
> > > > > > during lookup and convert the inode mode from S_IFREG to S_IFCHR.
> > > > > > However, I also need to hook up readdir and convert DT_REG to DT_CHR,
> > > > > > otherwise readdir will report the wrong d_type. To make it worse,
> > > > > > overlayfs itself looks for DT_CHR to handle whiteouts when listing
> > > > > > files, so nesting is not working without that.
> > > > > >
> > > > > > The only way I see to implement that conversion is to call getxattr()
> > > > > > on every DT_REG file during readdir(), and while a single getxattr()
> > > > > > on lookup is fine, I don't think that is.
> > > > > >
> > > > > > Any other ideas?
> > > > >
> > > > > Not messing with d_type seems a good idea.   How about a random
> > > > > unreserved chardev?
> > > >
> > > > Only the whiteout one (0,0) can be created by non-root users.
> > >
> > > I was thinking of (ab)using DT_SOCK or DT_FIFO, but turns out you
> > > can't store xattrs on such files.
> >
> > FWIW, there is also DT_WHT that was defined and never used.
> > But that is just an anecdote.
> >
> > Regarding the issue of avoiding getxattr for every dirent.
> > Note that in readdir, dirent that goes through ovl_cache_update_ino()
> > calls lookup()/stat() on the overlay itself, so as long as ovl_lookup()
> > will treat overlay.whiteout file as a whiteout, the code
> >                  /* Mark a stale entry */
> >                 p->is_whiteout = true;
> > will kick in and do the right thing for readdir wrt cleaning up
> > lower entries covered with whiteouts, regardless of DT_CHR.
> >
> > Now all that is left is to make sure that ovl_cache_update_ino()
> > is called if there are possibly overlay.whiteout files.
> >
> > For the case of nested ovl, upper and lower fs cannot be the same,
> > so ovl_same_fs() is false.
> > Therefore, as long as xino is enabled (this is the default),
> > ovl_same_dev() is true => ovl_xino_bits() > 0 =>
> > ovl_calc_d_ino() is true and ovl_cache_update_ino() will be
> > called for all merged dirents.
> >
> > IOW, unless I am missing something, if you implement overlay.whiteout
> > logic in ovl_lookup() correctly, readdir should "just work" as long as
> > the mounter does not explicitly use -o xino=off.
>
> Things are not that rosy for this.
>
> First of all, the default value for OVERLAY_FS_XINO_AUTO is no, so by
> default xino is not enabled. This means that overlay.whiteout only
> works if you enable xino=on/auto in the mount.
>
> Secondly, in the case where all upper and lower are on the same fs,
> even if xino is enabled it is ignored. This is not the  case where the
> lower is on a nested  overlayfs as you say, but maybe we want to
> create a lower dir that works on both when stored on a overlayfs and
> elsewere. Relying on xino which is essentially unrelated to whiteouts
> to get enabled seems quite fragile.
>
> I'm gonna instead try to make the alternative whiteout be a fifo with
> an xattr. Then we can add DT_FIFO to the maybe_whiteout case during
> readdir() which will then get picked up as a whiteout without relying
> on xino.
>
> I still have an issue with this approach though. Suppose we have a
> rootfs that we want to make available as an overlayfs lower (using
> trusted.overlay.*). We can escape all the xattrs in the rootfs with
> trusted.overlay.overlay to make them show up correctly. However,
> suppose the rootfs contains a char(0.0) whiteout. how do we encode
> this? We can use a file with trusted.overlay.overlay.whiteout, which
> then will be visible as trusted.overlay.whiteout in the overlayfs
> mount. This is normally functionally equivalent to the char(0,0)
> whiteout, but not if the user of the rootfs mounts the second
> overlayfs using userxattr.
>
> Maybe it could convert a char(0,0) to a file with both
> trusted.overlay.overlay.whiteout and user.overlay.whiteout? The the
> resulting overlayfs file could be used as a whiteout for both types,
> just like the char node...

Ah, no. that won't work because we can't set a user xattr on a fifo.

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-23 13:13               ` Alexander Larsson
  2023-08-23 13:21                 ` Alexander Larsson
@ 2023-08-23 14:42                 ` Amir Goldstein
  2023-08-23 14:52                   ` Miklos Szeredi
  2023-08-23 14:50                 ` Alexander Larsson
  2 siblings, 1 reply; 47+ messages in thread
From: Amir Goldstein @ 2023-08-23 14:42 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: Miklos Szeredi, linux-unionfs

On Wed, Aug 23, 2023 at 4:13 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> On Tue, Aug 22, 2023 at 5:31 PM Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Tue, Aug 22, 2023 at 5:36 PM Alexander Larsson <alexl@redhat.com> wrote:
> > >
> > > On Tue, Aug 22, 2023 at 4:25 PM Alexander Larsson <alexl@redhat.com> wrote:
> > > >
> > > > On Tue, Aug 22, 2023 at 3:56 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > >
> > > > > On Tue, 22 Aug 2023 at 15:22, Alexander Larsson <alexl@redhat.com> wrote:
> > > > > >
> > > > > > On Mon, Aug 21, 2023 at 1:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > > > >
> > > > > > > On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
> > > > > > > >
> > > > > > > > This is needed to properly stack overlay filesystems, I.E, being able
> > > > > > > > to create a whiteout file on an overlay mount and then use that as
> > > > > > > > part of the lowerdir in another overlay mount.
> > > > > > > >
> > > > > > > > The way this works is that we create a regular whiteout, but set the
> > > > > > > > `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> > > > > > > > whiteout we check this xattr and don't treat it as a whiteout if it is
> > > > > > > > set. The xattr itself is then stripped and when viewed as part of the
> > > > > > > > overlayfs mount it looks like a regular whiteout.
> > > > > > > >
> > > > > > >
> > > > > > > I understand the motivation, but don't have good feelings about the
> > > > > > > implementation.  Like the xattr escaping this should also have the
> > > > > > > property that when fed to an old kernel version, it shouldn't
> > > > > > > interpret this object as a whiteout.  Whether it remains hidden like
> > > > > > > the escaped xattrs or if it shows up as something else is
> > > > > > > uninteresting.
> > > > > > >
> > > > > > > It could just be a zero sized regular file with "overlay.whiteout".
> > > > > >
> > > > > > So, I started doing this, where a whiteout is just a regular file with
> > > > > > the xattr set. Initially I thought I only needed to check the xattr
> > > > > > during lookup and convert the inode mode from S_IFREG to S_IFCHR.
> > > > > > However, I also need to hook up readdir and convert DT_REG to DT_CHR,
> > > > > > otherwise readdir will report the wrong d_type. To make it worse,
> > > > > > overlayfs itself looks for DT_CHR to handle whiteouts when listing
> > > > > > files, so nesting is not working without that.
> > > > > >
> > > > > > The only way I see to implement that conversion is to call getxattr()
> > > > > > on every DT_REG file during readdir(), and while a single getxattr()
> > > > > > on lookup is fine, I don't think that is.
> > > > > >
> > > > > > Any other ideas?
> > > > >
> > > > > Not messing with d_type seems a good idea.   How about a random
> > > > > unreserved chardev?
> > > >
> > > > Only the whiteout one (0,0) can be created by non-root users.
> > >
> > > I was thinking of (ab)using DT_SOCK or DT_FIFO, but turns out you
> > > can't store xattrs on such files.
> >
> > FWIW, there is also DT_WHT that was defined and never used.
> > But that is just an anecdote.
> >
> > Regarding the issue of avoiding getxattr for every dirent.
> > Note that in readdir, dirent that goes through ovl_cache_update_ino()
> > calls lookup()/stat() on the overlay itself, so as long as ovl_lookup()
> > will treat overlay.whiteout file as a whiteout, the code
> >                  /* Mark a stale entry */
> >                 p->is_whiteout = true;
> > will kick in and do the right thing for readdir wrt cleaning up
> > lower entries covered with whiteouts, regardless of DT_CHR.
> >
> > Now all that is left is to make sure that ovl_cache_update_ino()
> > is called if there are possibly overlay.whiteout files.
> >
> > For the case of nested ovl, upper and lower fs cannot be the same,
> > so ovl_same_fs() is false.
> > Therefore, as long as xino is enabled (this is the default),
> > ovl_same_dev() is true => ovl_xino_bits() > 0 =>
> > ovl_calc_d_ino() is true and ovl_cache_update_ino() will be
> > called for all merged dirents.
> >
> > IOW, unless I am missing something, if you implement overlay.whiteout
> > logic in ovl_lookup() correctly, readdir should "just work" as long as
> > the mounter does not explicitly use -o xino=off.
>
> Things are not that rosy for this.
>
> First of all, the default value for OVERLAY_FS_XINO_AUTO is no, so by
> default xino is not enabled. This means that overlay.whiteout only
> works if you enable xino=on/auto in the mount.
>
> Secondly, in the case where all upper and lower are on the same fs,
> even if xino is enabled it is ignored. This is not the  case where the
> lower is on a nested  overlayfs as you say, but maybe we want to
> create a lower dir that works on both when stored on a overlayfs and
> elsewere. Relying on xino which is essentially unrelated to whiteouts
> to get enabled seems quite fragile.
>

All right. But you do not have to rely on xino && !ovl_same_fs().
What you need is for ovl_calc_d_ino() to be true.

For example, consider this (edited) condition in ovl_calc_d_ino():
        /*
         * Recalc d_ino for all entries if dir is impure (contains
         * copied up entries)
         */
        if (ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry)))
                return true;

It decided to lookup the entries based on a property of the parent dir.

Specifically, this check will always be true for merge dirs that have an
upper dir with non-zero copied-up files.
But you can use the some principle to "taint" directories that contain
"xattr_whiteouts" in a similar manner to "impurity" and that would
be enough for ovl_calc_d_ino() to do the right thing also for merge dirs
with no upper dir.

If we do this, then both overlay.whiteout and overlay.xattr_whiteouts
xattrs will be xattrs that the overlayfs driver never sets.
It's a precedent, but as long as it is properly documented and encoded
in fstests, I will be fine with it. Not sure about Miklos.

Are you ok with leaving the responsibility to mark the directories as
overlay.xattr_whiteouts to mkfs.composefs?
Does this solution work for you?

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-23 13:13               ` Alexander Larsson
  2023-08-23 13:21                 ` Alexander Larsson
  2023-08-23 14:42                 ` Amir Goldstein
@ 2023-08-23 14:50                 ` Alexander Larsson
  2 siblings, 0 replies; 47+ messages in thread
From: Alexander Larsson @ 2023-08-23 14:50 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, linux-unionfs

On Wed, Aug 23, 2023 at 3:13 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> On Tue, Aug 22, 2023 at 5:31 PM Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Tue, Aug 22, 2023 at 5:36 PM Alexander Larsson <alexl@redhat.com> wrote:
> > >
> > > On Tue, Aug 22, 2023 at 4:25 PM Alexander Larsson <alexl@redhat.com> wrote:
> > > >
> > > > On Tue, Aug 22, 2023 at 3:56 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > >
> > > > > On Tue, 22 Aug 2023 at 15:22, Alexander Larsson <alexl@redhat.com> wrote:
> > > > > >
> > > > > > On Mon, Aug 21, 2023 at 1:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > > > >
> > > > > > > On Thu, 17 Aug 2023 at 13:05, Alexander Larsson <alexl@redhat.com> wrote:
> > > > > > > >
> > > > > > > > This is needed to properly stack overlay filesystems, I.E, being able
> > > > > > > > to create a whiteout file on an overlay mount and then use that as
> > > > > > > > part of the lowerdir in another overlay mount.
> > > > > > > >
> > > > > > > > The way this works is that we create a regular whiteout, but set the
> > > > > > > > `overlay.nowhiteout` xattr on it. Whenever we check if a file is a
> > > > > > > > whiteout we check this xattr and don't treat it as a whiteout if it is
> > > > > > > > set. The xattr itself is then stripped and when viewed as part of the
> > > > > > > > overlayfs mount it looks like a regular whiteout.
> > > > > > > >
> > > > > > >
> > > > > > > I understand the motivation, but don't have good feelings about the
> > > > > > > implementation.  Like the xattr escaping this should also have the
> > > > > > > property that when fed to an old kernel version, it shouldn't
> > > > > > > interpret this object as a whiteout.  Whether it remains hidden like
> > > > > > > the escaped xattrs or if it shows up as something else is
> > > > > > > uninteresting.
> > > > > > >
> > > > > > > It could just be a zero sized regular file with "overlay.whiteout".
> > > > > >
> > > > > > So, I started doing this, where a whiteout is just a regular file with
> > > > > > the xattr set. Initially I thought I only needed to check the xattr
> > > > > > during lookup and convert the inode mode from S_IFREG to S_IFCHR.
> > > > > > However, I also need to hook up readdir and convert DT_REG to DT_CHR,
> > > > > > otherwise readdir will report the wrong d_type. To make it worse,
> > > > > > overlayfs itself looks for DT_CHR to handle whiteouts when listing
> > > > > > files, so nesting is not working without that.
> > > > > >
> > > > > > The only way I see to implement that conversion is to call getxattr()
> > > > > > on every DT_REG file during readdir(), and while a single getxattr()
> > > > > > on lookup is fine, I don't think that is.
> > > > > >
> > > > > > Any other ideas?
> > > > >
> > > > > Not messing with d_type seems a good idea.   How about a random
> > > > > unreserved chardev?
> > > >
> > > > Only the whiteout one (0,0) can be created by non-root users.
> > >
> > > I was thinking of (ab)using DT_SOCK or DT_FIFO, but turns out you
> > > can't store xattrs on such files.
> >
> > FWIW, there is also DT_WHT that was defined and never used.
> > But that is just an anecdote.
> >
> > Regarding the issue of avoiding getxattr for every dirent.
> > Note that in readdir, dirent that goes through ovl_cache_update_ino()
> > calls lookup()/stat() on the overlay itself, so as long as ovl_lookup()
> > will treat overlay.whiteout file as a whiteout, the code
> >                  /* Mark a stale entry */
> >                 p->is_whiteout = true;
> > will kick in and do the right thing for readdir wrt cleaning up
> > lower entries covered with whiteouts, regardless of DT_CHR.
> >
> > Now all that is left is to make sure that ovl_cache_update_ino()
> > is called if there are possibly overlay.whiteout files.
> >
> > For the case of nested ovl, upper and lower fs cannot be the same,
> > so ovl_same_fs() is false.
> > Therefore, as long as xino is enabled (this is the default),
> > ovl_same_dev() is true => ovl_xino_bits() > 0 =>
> > ovl_calc_d_ino() is true and ovl_cache_update_ino() will be
> > called for all merged dirents.
> >
> > IOW, unless I am missing something, if you implement overlay.whiteout
> > logic in ovl_lookup() correctly, readdir should "just work" as long as
> > the mounter does not explicitly use -o xino=off.
>
> Things are not that rosy for this.
>
> First of all, the default value for OVERLAY_FS_XINO_AUTO is no, so by
> default xino is not enabled. This means that overlay.whiteout only
> works if you enable xino=on/auto in the mount.
>
> Secondly, in the case where all upper and lower are on the same fs,
> even if xino is enabled it is ignored. This is not the  case where the
> lower is on a nested  overlayfs as you say, but maybe we want to
> create a lower dir that works on both when stored on a overlayfs and
> elsewere. Relying on xino which is essentially unrelated to whiteouts
> to get enabled seems quite fragile.
>
> I'm gonna instead try to make the alternative whiteout be a fifo with
> an xattr. Then we can add DT_FIFO to the maybe_whiteout case during
> readdir() which will then get picked up as a whiteout without relying
> on xino.

Ok, here are two approaches:

A fifo with overlay.whiteout is treated as an alternative kind of whiteout:
  https://github.com/alexlarsson/linux/commit/8392cac01ef10cd9ad53203fb3c7b381e1ecae26

A fifo with overlay.whiteout is treated gets "unescaped" to a regular whiteout:
  https://github.com/alexlarsson/linux/commit/7d7f77c1541f60213abc00412c176a47fd5bc046

Opinions on these? I like the second one better.

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-23 14:42                 ` Amir Goldstein
@ 2023-08-23 14:52                   ` Miklos Szeredi
  2023-08-23 15:02                     ` Alexander Larsson
  2023-08-23 15:50                     ` Amir Goldstein
  0 siblings, 2 replies; 47+ messages in thread
From: Miklos Szeredi @ 2023-08-23 14:52 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Alexander Larsson, linux-unionfs

On Wed, 23 Aug 2023 at 16:43, Amir Goldstein <amir73il@gmail.com> wrote:

> If we do this, then both overlay.whiteout and overlay.xattr_whiteouts
> xattrs will be xattrs that the overlayfs driver never sets.
> It's a precedent, but as long as it is properly documented and encoded
> in fstests, I will be fine with it. Not sure about Miklos.

Firstly I need to properly understand the proposal.  At this point I'm
not sure what overlay.whiteout is supposed to mean.   Does it mean the
same as a whiteout (chrdev(0,0))?  Or does it mean that overlayfs
should not treat it as a whiteout, but instead transform that into a
chrdev(0,0) for the top overlay to interpret as a whiteout?  Or
something else?

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-23 14:52                   ` Miklos Szeredi
@ 2023-08-23 15:02                     ` Alexander Larsson
  2023-08-23 15:50                     ` Amir Goldstein
  1 sibling, 0 replies; 47+ messages in thread
From: Alexander Larsson @ 2023-08-23 15:02 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: Amir Goldstein, linux-unionfs

On Wed, Aug 23, 2023 at 4:52 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Wed, 23 Aug 2023 at 16:43, Amir Goldstein <amir73il@gmail.com> wrote:
>
> > If we do this, then both overlay.whiteout and overlay.xattr_whiteouts
> > xattrs will be xattrs that the overlayfs driver never sets.
> > It's a precedent, but as long as it is properly documented and encoded
> > in fstests, I will be fine with it. Not sure about Miklos.
>
> Firstly I need to properly understand the proposal.  At this point I'm
> not sure what overlay.whiteout is supposed to mean.   Does it mean the
> same as a whiteout (chrdev(0,0))?  Or does it mean that overlayfs
> should not treat it as a whiteout, but instead transform that into a
> chrdev(0,0) for the top overlay to interpret as a whiteout?  Or
> something else?

Both of these have been discussed, and I tried both in my two
alternative proposals in the other mail I just sent. Also, both
approaches could use the taint-the-directory xattr approach amir
proposed here.

I think the overlay.whiteout transforms to chrdev(0,0) approach is
better, because that will allow the resulting unescaped whiteout to be
used by both regular and userxattr overlay mounts.

I'll have a go at combining the transform approach with a directory
xattr, because that could use regular files that get transformed to
whiteouts, which means it could also work with user.overlay.whiteout
xattrs.


-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-23 14:52                   ` Miklos Szeredi
  2023-08-23 15:02                     ` Alexander Larsson
@ 2023-08-23 15:50                     ` Amir Goldstein
  2023-08-24  9:56                       ` Alexander Larsson
  1 sibling, 1 reply; 47+ messages in thread
From: Amir Goldstein @ 2023-08-23 15:50 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: Alexander Larsson, linux-unionfs

On Wed, Aug 23, 2023 at 5:52 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Wed, 23 Aug 2023 at 16:43, Amir Goldstein <amir73il@gmail.com> wrote:
>
> > If we do this, then both overlay.whiteout and overlay.xattr_whiteouts
> > xattrs will be xattrs that the overlayfs driver never sets.
> > It's a precedent, but as long as it is properly documented and encoded
> > in fstests, I will be fine with it. Not sure about Miklos.
>
> Firstly I need to properly understand the proposal.  At this point I'm
> not sure what overlay.whiteout is supposed to mean.   Does it mean the
> same as a whiteout (chrdev(0,0))?  Or does it mean that overlayfs
> should not treat it as a whiteout, but instead transform that into a
> chrdev(0,0) for the top overlay to interpret as a whiteout?  Or
> something else?
>

My proposal does not involve any transformation.
It is "just" to support another format for a whiteout.
Transforming a REG or FIFO real object to CHR ovl object
will be a pain IMO and I don't see why it is needed.

Let me try again from the top:
1. ovl_path_is_whiteout() checks if either ovl_is_whiteout() (chardev(0,0))
    or regular file with "overlay.whiteout" xattr, so ovl_lookup()
will result in
    a negative dentry if either whiteout format is found at topmost layer
2. To optimize unneeded getxattr, "overlay.whiteout" xattr could be checked
    only in case the parent dir has xattr "overlay.xattr_whiteouts"
3. mkfs.composefs is responsible of creating the non-chardev whiteouts
    as well as the marking the dirs that contains them with
    "overlay.xattr_whiteouts" - overlayfs itself never does that
4. ovl_calc_d_ino() (from readdir on a merge dir) returns true if the
    the iterated dir has "overlay.xattr_whiteouts" xattr
5. That will cause ovl_cache_update_ino() to lookup the
    *overlay dentry* that will be negative (as per rule 1 above)
    if either whiteout format is found at topmost layer and that
    will cause the readdir dirent to be marked is_whiteout and
    filtered out of readdir results

* The trick for readdir is that the the per dirent DT_CHR optimization
  is traded off for a per parent dir optimization, but even the worst case
  where all directories have xattr_whiteouts, readdir is not more
  expensive than readdir with xino enabled, which is the default for
  several Linux distros

Hope this is more clear?

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-23 15:50                     ` Amir Goldstein
@ 2023-08-24  9:56                       ` Alexander Larsson
  2023-08-24 11:43                         ` Amir Goldstein
  0 siblings, 1 reply; 47+ messages in thread
From: Alexander Larsson @ 2023-08-24  9:56 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, linux-unionfs

On Wed, Aug 23, 2023 at 5:50 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Wed, Aug 23, 2023 at 5:52 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> >
> > On Wed, 23 Aug 2023 at 16:43, Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > > If we do this, then both overlay.whiteout and overlay.xattr_whiteouts
> > > xattrs will be xattrs that the overlayfs driver never sets.
> > > It's a precedent, but as long as it is properly documented and encoded
> > > in fstests, I will be fine with it. Not sure about Miklos.
> >
> > Firstly I need to properly understand the proposal.  At this point I'm
> > not sure what overlay.whiteout is supposed to mean.   Does it mean the
> > same as a whiteout (chrdev(0,0))?  Or does it mean that overlayfs
> > should not treat it as a whiteout, but instead transform that into a
> > chrdev(0,0) for the top overlay to interpret as a whiteout?  Or
> > something else?
> >
>
> My proposal does not involve any transformation.
> It is "just" to support another format for a whiteout.
> Transforming a REG or FIFO real object to CHR ovl object
> will be a pain IMO and I don't see why it is needed.
>
> Let me try again from the top:
> 1. ovl_path_is_whiteout() checks if either ovl_is_whiteout() (chardev(0,0))
>     or regular file with "overlay.whiteout" xattr, so ovl_lookup()
> will result in
>     a negative dentry if either whiteout format is found at topmost layer
> 2. To optimize unneeded getxattr, "overlay.whiteout" xattr could be checked
>     only in case the parent dir has xattr "overlay.xattr_whiteouts"
> 3. mkfs.composefs is responsible of creating the non-chardev whiteouts
>     as well as the marking the dirs that contains them with
>     "overlay.xattr_whiteouts" - overlayfs itself never does that
> 4. ovl_calc_d_ino() (from readdir on a merge dir) returns true if the
>     the iterated dir has "overlay.xattr_whiteouts" xattr
> 5. That will cause ovl_cache_update_ino() to lookup the
>     *overlay dentry* that will be negative (as per rule 1 above)
>     if either whiteout format is found at topmost layer and that
>     will cause the readdir dirent to be marked is_whiteout and
>     filtered out of readdir results
>
> * The trick for readdir is that the the per dirent DT_CHR optimization
>   is traded off for a per parent dir optimization, but even the worst case
>   where all directories have xattr_whiteouts, readdir is not more
>   expensive than readdir with xino enabled, which is the default for
>   several Linux distros
>
> Hope this is more clear?

Ok, so I implemented this, both using the transforming-to-whiteout and
the alternative-whiteout approach.

Here is the transform-to-whiteout approach:
  https://github.com/alexlarsson/linux/tree/ovl-nesting-transform

In it, if you have a lower dir with these files/xattrs:
 * lowerdir/foo - directory
     trusted.overlay.whiteouts
 * lowerdir/foo/hide_file - regular file
     trusted.overlay.whiteout

Then you will get an overlay no-userxattr mount like this:
 * lowerdir/foo - directory
 * lowerdir/foo/hide_file - chardev(0,0) file

This can be used as a lower in any overlayfs mount you want, userxattr or no.

Here is the alternative-whiteout approach:
 https://github.com/alexlarsson/linux/tree/ovl-nesting-alternative

In it, if you have a lower dir with these files/xattrs:
 * lowerdir/foo - directory
     trusted.overlay.overlay.whiteouts
     user.overlay.whiteouts
  * lowerdir/foo/hide_file - regular file
     trusted.overlay.overlay.whiteout
     user.overlay.whiteout

Then you will get an overlay no-userxattr mount like this:
 * lowerdir/foo - directory
     trusted.overlay.whiteouts
     user.overlay.whiteouts
  * lowerdir/foo/hide_file - regular file
     trusted.overlay.whiteout
     user.overlay.whiteout

This can be used as a lower in any overlayfs mount you want, userxattr or no.

I prefer the transform-to-whiteout approach for a two reasons:

Given an existing image (say an OCI image) we can construct an
overlayfs mount that is not just functionally identical, but also
indistinguible from the expected one. I.e. if the original OCI image
had a chardev(0,0) we will still have one in the mount.

When creating multiple lower dirs (e.g. from a multi-layered OCI
image) you have to carry forward xattrs on directories from the lower
layers to the upper, otherwise a merged directory from a higher layer
will overwrite the "overlay.whiteouts" xattr. This makes an otherwise
local operation (just escape the files in this layer) to a global one
that depends on all layers.

In terms of implementation complexity I think they are very similar.

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-24  9:56                       ` Alexander Larsson
@ 2023-08-24 11:43                         ` Amir Goldstein
  2023-08-24 14:22                           ` Alexander Larsson
  0 siblings, 1 reply; 47+ messages in thread
From: Amir Goldstein @ 2023-08-24 11:43 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: Miklos Szeredi, linux-unionfs

On Thu, Aug 24, 2023 at 12:56 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> On Wed, Aug 23, 2023 at 5:50 PM Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Wed, Aug 23, 2023 at 5:52 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > >
> > > On Wed, 23 Aug 2023 at 16:43, Amir Goldstein <amir73il@gmail.com> wrote:
> > >
> > > > If we do this, then both overlay.whiteout and overlay.xattr_whiteouts
> > > > xattrs will be xattrs that the overlayfs driver never sets.
> > > > It's a precedent, but as long as it is properly documented and encoded
> > > > in fstests, I will be fine with it. Not sure about Miklos.
> > >
> > > Firstly I need to properly understand the proposal.  At this point I'm
> > > not sure what overlay.whiteout is supposed to mean.   Does it mean the
> > > same as a whiteout (chrdev(0,0))?  Or does it mean that overlayfs
> > > should not treat it as a whiteout, but instead transform that into a
> > > chrdev(0,0) for the top overlay to interpret as a whiteout?  Or
> > > something else?
> > >
> >
> > My proposal does not involve any transformation.
> > It is "just" to support another format for a whiteout.
> > Transforming a REG or FIFO real object to CHR ovl object
> > will be a pain IMO and I don't see why it is needed.
> >
> > Let me try again from the top:
> > 1. ovl_path_is_whiteout() checks if either ovl_is_whiteout() (chardev(0,0))
> >     or regular file with "overlay.whiteout" xattr, so ovl_lookup()
> > will result in
> >     a negative dentry if either whiteout format is found at topmost layer
> > 2. To optimize unneeded getxattr, "overlay.whiteout" xattr could be checked
> >     only in case the parent dir has xattr "overlay.xattr_whiteouts"
> > 3. mkfs.composefs is responsible of creating the non-chardev whiteouts
> >     as well as the marking the dirs that contains them with
> >     "overlay.xattr_whiteouts" - overlayfs itself never does that
> > 4. ovl_calc_d_ino() (from readdir on a merge dir) returns true if the
> >     the iterated dir has "overlay.xattr_whiteouts" xattr
> > 5. That will cause ovl_cache_update_ino() to lookup the
> >     *overlay dentry* that will be negative (as per rule 1 above)
> >     if either whiteout format is found at topmost layer and that
> >     will cause the readdir dirent to be marked is_whiteout and
> >     filtered out of readdir results
> >
> > * The trick for readdir is that the the per dirent DT_CHR optimization
> >   is traded off for a per parent dir optimization, but even the worst case
> >   where all directories have xattr_whiteouts, readdir is not more
> >   expensive than readdir with xino enabled, which is the default for
> >   several Linux distros
> >
> > Hope this is more clear?
>
> Ok, so I implemented this, both using the transforming-to-whiteout and
> the alternative-whiteout approach.
>
> Here is the transform-to-whiteout approach:
>   https://github.com/alexlarsson/linux/tree/ovl-nesting-transform
>
> In it, if you have a lower dir with these files/xattrs:
>  * lowerdir/foo - directory
>      trusted.overlay.whiteouts
>  * lowerdir/foo/hide_file - regular file
>      trusted.overlay.whiteout
>
> Then you will get an overlay no-userxattr mount like this:
>  * lowerdir/foo - directory
>  * lowerdir/foo/hide_file - chardev(0,0) file
>
> This can be used as a lower in any overlayfs mount you want, userxattr or no.
>
> Here is the alternative-whiteout approach:
>  https://github.com/alexlarsson/linux/tree/ovl-nesting-alternative
>
> In it, if you have a lower dir with these files/xattrs:
>  * lowerdir/foo - directory
>      trusted.overlay.overlay.whiteouts
>      user.overlay.whiteouts
>   * lowerdir/foo/hide_file - regular file
>      trusted.overlay.overlay.whiteout
>      user.overlay.whiteout
>
> Then you will get an overlay no-userxattr mount like this:
>  * lowerdir/foo - directory
>      trusted.overlay.whiteouts
>      user.overlay.whiteouts
>   * lowerdir/foo/hide_file - regular file
>      trusted.overlay.whiteout
>      user.overlay.whiteout
>
> This can be used as a lower in any overlayfs mount you want, userxattr or no.
>
> I prefer the transform-to-whiteout approach for a two reasons:
>
> Given an existing image (say an OCI image) we can construct an
> overlayfs mount that is not just functionally identical, but also
> indistinguible from the expected one. I.e. if the original OCI image
> had a chardev(0,0) we will still have one in the mount.
>

I thought that OCI image format is using a different without format
which is converted to ovl whiteout format anyway:
https://github.com/opencontainers/image-spec/blob/main/layer.md#whiteouts

Also, IIUC, you want this feature for mkfs.composefs, which doesn't
have backward compat requirements and doesn't need to create
ovl images identical to existing ones. Please correct me if I am wrong.

> When creating multiple lower dirs (e.g. from a multi-layered OCI
> image) you have to carry forward xattrs on directories from the lower
> layers to the upper, otherwise a merged directory from a higher layer
> will overwrite the "overlay.whiteouts" xattr. This makes an otherwise
> local operation (just escape the files in this layer) to a global one
> that depends on all layers.

I don't understand this claim. In you implementation:
rdd->in_xwhiteouts_dir =
   ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath);
checks the "overlay.whiteouts" xattr on every layer.

Checking if an entry is a whiteout only matters in the uppermost layer
that this named entry is found.

My understanding is that is mkfs.composefs creates a xwhiteout file,
it ONLY needs to locally set the overlay.whiteouts xattr on its parent
dir. I don't understand how multi layers matter.

>
> In terms of implementation complexity I think they are very similar.
>

Sorry. I disagree. The two implementations may be on par wrt
lines of code, but I perceive the transform-to-whiteout patch as
harder to maintain.

The fact that an xattr changes the file type seems terrifying to me.
Imagine that upper layer has a non-empty regular file with
overlay.whiteout xattr - this is exposed as a regular file and then
user truncates this regular file - BOOM (turn to whiteout?).

I need to understand your concerns about my proposal because
I did not get them.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-24 11:43                         ` Amir Goldstein
@ 2023-08-24 14:22                           ` Alexander Larsson
  2023-08-25  8:40                             ` Amir Goldstein
  0 siblings, 1 reply; 47+ messages in thread
From: Alexander Larsson @ 2023-08-24 14:22 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, linux-unionfs

On Thu, Aug 24, 2023 at 1:43 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Thu, Aug 24, 2023 at 12:56 PM Alexander Larsson <alexl@redhat.com> wrote:
> >
> > On Wed, Aug 23, 2023 at 5:50 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > >
> > > On Wed, Aug 23, 2023 at 5:52 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > >
> > > > On Wed, 23 Aug 2023 at 16:43, Amir Goldstein <amir73il@gmail.com> wrote:
> > > >
> > > > > If we do this, then both overlay.whiteout and overlay.xattr_whiteouts
> > > > > xattrs will be xattrs that the overlayfs driver never sets.
> > > > > It's a precedent, but as long as it is properly documented and encoded
> > > > > in fstests, I will be fine with it. Not sure about Miklos.
> > > >
> > > > Firstly I need to properly understand the proposal.  At this point I'm
> > > > not sure what overlay.whiteout is supposed to mean.   Does it mean the
> > > > same as a whiteout (chrdev(0,0))?  Or does it mean that overlayfs
> > > > should not treat it as a whiteout, but instead transform that into a
> > > > chrdev(0,0) for the top overlay to interpret as a whiteout?  Or
> > > > something else?
> > > >
> > >
> > > My proposal does not involve any transformation.
> > > It is "just" to support another format for a whiteout.
> > > Transforming a REG or FIFO real object to CHR ovl object
> > > will be a pain IMO and I don't see why it is needed.
> > >
> > > Let me try again from the top:
> > > 1. ovl_path_is_whiteout() checks if either ovl_is_whiteout() (chardev(0,0))
> > >     or regular file with "overlay.whiteout" xattr, so ovl_lookup()
> > > will result in
> > >     a negative dentry if either whiteout format is found at topmost layer
> > > 2. To optimize unneeded getxattr, "overlay.whiteout" xattr could be checked
> > >     only in case the parent dir has xattr "overlay.xattr_whiteouts"
> > > 3. mkfs.composefs is responsible of creating the non-chardev whiteouts
> > >     as well as the marking the dirs that contains them with
> > >     "overlay.xattr_whiteouts" - overlayfs itself never does that
> > > 4. ovl_calc_d_ino() (from readdir on a merge dir) returns true if the
> > >     the iterated dir has "overlay.xattr_whiteouts" xattr
> > > 5. That will cause ovl_cache_update_ino() to lookup the
> > >     *overlay dentry* that will be negative (as per rule 1 above)
> > >     if either whiteout format is found at topmost layer and that
> > >     will cause the readdir dirent to be marked is_whiteout and
> > >     filtered out of readdir results
> > >
> > > * The trick for readdir is that the the per dirent DT_CHR optimization
> > >   is traded off for a per parent dir optimization, but even the worst case
> > >   where all directories have xattr_whiteouts, readdir is not more
> > >   expensive than readdir with xino enabled, which is the default for
> > >   several Linux distros
> > >
> > > Hope this is more clear?
> >
> > Ok, so I implemented this, both using the transforming-to-whiteout and
> > the alternative-whiteout approach.
> >
> > Here is the transform-to-whiteout approach:
> >   https://github.com/alexlarsson/linux/tree/ovl-nesting-transform
> >
> > In it, if you have a lower dir with these files/xattrs:
> >  * lowerdir/foo - directory
> >      trusted.overlay.whiteouts
> >  * lowerdir/foo/hide_file - regular file
> >      trusted.overlay.whiteout
> >
> > Then you will get an overlay no-userxattr mount like this:
> >  * lowerdir/foo - directory
> >  * lowerdir/foo/hide_file - chardev(0,0) file
> >
> > This can be used as a lower in any overlayfs mount you want, userxattr or no.
> >
> > Here is the alternative-whiteout approach:
> >  https://github.com/alexlarsson/linux/tree/ovl-nesting-alternative
> >
> > In it, if you have a lower dir with these files/xattrs:
> >  * lowerdir/foo - directory
> >      trusted.overlay.overlay.whiteouts
> >      user.overlay.whiteouts
> >   * lowerdir/foo/hide_file - regular file
> >      trusted.overlay.overlay.whiteout
> >      user.overlay.whiteout
> >
> > Then you will get an overlay no-userxattr mount like this:
> >  * lowerdir/foo - directory
> >      trusted.overlay.whiteouts
> >      user.overlay.whiteouts
> >   * lowerdir/foo/hide_file - regular file
> >      trusted.overlay.whiteout
> >      user.overlay.whiteout
> >
> > This can be used as a lower in any overlayfs mount you want, userxattr or no.
> >
> > I prefer the transform-to-whiteout approach for a two reasons:
> >
> > Given an existing image (say an OCI image) we can construct an
> > overlayfs mount that is not just functionally identical, but also
> > indistinguible from the expected one. I.e. if the original OCI image
> > had a chardev(0,0) we will still have one in the mount.
> >
>
> I thought that OCI image format is using a different without format
> which is converted to ovl whiteout format anyway:
> https://github.com/opencontainers/image-spec/blob/main/layer.md#whiteouts

Yeah, but those whiteouts are not the target of this work. An OCI
image is a set of tarballs, and one tarball can contain magically
marked ".wh." prefixed files for whiteouts. These are converted to
real whiteouts by docker. However, suppose the image itself contains
an overlayfs lower directory, which the app in the container wants to
use. This would look like a chardev(0,0) (not a .wh.*) in the tarball.
If this is naively extracted to disk (without escaping) then those
whiteouts will not be visible to the container app when it runs in the
container, because the outer overlayfs ate them.

Such whiteouts in OCI containers don't currently work, so you won't
find any such OCI containers in the wild. But with escapes it seems
useful. Something similar that you *will* find in the wild however, is
ostree images with whiteouts in them, and we want to keep supporting
these. See more below.

> Also, IIUC, you want this feature for mkfs.composefs, which doesn't
> have backward compat requirements and doesn't need to create
> ovl images identical to existing ones. Please correct me if I am wrong.

One of the goals is to use composefs as a backend for ostree (instead
of hardlinked trees). And ostree images with whiteouts in them are
pretty common (typically from os images with preinstalled container
images).

If mkfs.composefs converted these to xattr whiteouts it may work when
you run the containers in the image. But its not ideal if the content
of the rootfs depends on the ostree backend, and its quite possible
that some existing tooling will be confused by the new whiteouts.

> > When creating multiple lower dirs (e.g. from a multi-layered OCI
> > image) you have to carry forward xattrs on directories from the lower
> > layers to the upper, otherwise a merged directory from a higher layer
> > will overwrite the "overlay.whiteouts" xattr. This makes an otherwise
> > local operation (just escape the files in this layer) to a global one
> > that depends on all layers.
>
> I don't understand this claim. In you implementation:
> rdd->in_xwhiteouts_dir =
>    ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath);
> checks the "overlay.whiteouts" xattr on every layer.
>
> Checking if an entry is a whiteout only matters in the uppermost layer
> that this named entry is found.

Yeah, that is true for the whiteouts, but not the escaped whiteouts.
It's a bit confusing, so let me give an example:

Suppose I have this file structure with a 2 layer overlayfs with an xwhiteout.

├── layer1
│   └── dir
│       └── file
└── layer2
    └── dir - overlay.whiteouts
        └── file - overlay.whiteout

(At this point, it is true what you say. If a layer3/dir existed
without "overlay.whiteouts", things would still work.)

Now I want to store this structure as the first layer inside another
overlayfs by escaping the xattrs, it would look like this:

layerA
├── layer1
│   └── dir
│       └── file
└── layer2
    └── dir - overlay.overlay.whiteouts
        └── file - overlay.overlay.whiteout
layerB
└── layer2
    └── dir
        └── new-file

You'd use it like this:
 mount -t overlay -o lowerdir=layerB:layerA overlay mntAB
 mount -t overlay -o lowerdir=mntAB/layer2:mntAB/layer1 overlay mnt12

However, if you try this, you'll notice that It doesn't work, due to a
missing "overlay.whiteouts" xattr on mntAB/layer2/dir.
This is caused by the file that got added to layer2/dir in layerB.
Since we got a new merged directory (layerB/layer2/dir) it overrides
the escaped xattrs from layerA/layer2/dir.

This is not an absolute showstopper, but it would be a nice property
if escaping a layer was an isolated operation independent of the other
layers.

> My understanding is that is mkfs.composefs creates a xwhiteout file,
> it ONLY needs to locally set the overlay.whiteouts xattr on its parent
> dir. I don't understand how multi layers matter.
>
> >
> > In terms of implementation complexity I think they are very similar.
> >
>
> Sorry. I disagree. The two implementations may be on par wrt
> lines of code, but I perceive the transform-to-whiteout patch as
> harder to maintain.
>
> The fact that an xattr changes the file type seems terrifying to me.
> Imagine that upper layer has a non-empty regular file with
> overlay.whiteout xattr - this is exposed as a regular file and then
> user truncates this regular file - BOOM (turn to whiteout?).

Nod.

> I need to understand your concerns about my proposal because
> I did not get them.

Hopefully I explained it above, so we can come up with a solution.

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-24 14:22                           ` Alexander Larsson
@ 2023-08-25  8:40                             ` Amir Goldstein
  2023-08-25 11:29                               ` Alexander Larsson
  0 siblings, 1 reply; 47+ messages in thread
From: Amir Goldstein @ 2023-08-25  8:40 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: Miklos Szeredi, linux-unionfs

On Thu, Aug 24, 2023 at 5:23 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> On Thu, Aug 24, 2023 at 1:43 PM Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Thu, Aug 24, 2023 at 12:56 PM Alexander Larsson <alexl@redhat.com> wrote:
> > >
> > > On Wed, Aug 23, 2023 at 5:50 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > > >
> > > > On Wed, Aug 23, 2023 at 5:52 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > >
> > > > > On Wed, 23 Aug 2023 at 16:43, Amir Goldstein <amir73il@gmail.com> wrote:
> > > > >
> > > > > > If we do this, then both overlay.whiteout and overlay.xattr_whiteouts
> > > > > > xattrs will be xattrs that the overlayfs driver never sets.
> > > > > > It's a precedent, but as long as it is properly documented and encoded
> > > > > > in fstests, I will be fine with it. Not sure about Miklos.
> > > > >
> > > > > Firstly I need to properly understand the proposal.  At this point I'm
> > > > > not sure what overlay.whiteout is supposed to mean.   Does it mean the
> > > > > same as a whiteout (chrdev(0,0))?  Or does it mean that overlayfs
> > > > > should not treat it as a whiteout, but instead transform that into a
> > > > > chrdev(0,0) for the top overlay to interpret as a whiteout?  Or
> > > > > something else?
> > > > >
> > > >
> > > > My proposal does not involve any transformation.
> > > > It is "just" to support another format for a whiteout.
> > > > Transforming a REG or FIFO real object to CHR ovl object
> > > > will be a pain IMO and I don't see why it is needed.
> > > >
> > > > Let me try again from the top:
> > > > 1. ovl_path_is_whiteout() checks if either ovl_is_whiteout() (chardev(0,0))
> > > >     or regular file with "overlay.whiteout" xattr, so ovl_lookup()
> > > > will result in
> > > >     a negative dentry if either whiteout format is found at topmost layer
> > > > 2. To optimize unneeded getxattr, "overlay.whiteout" xattr could be checked
> > > >     only in case the parent dir has xattr "overlay.xattr_whiteouts"
> > > > 3. mkfs.composefs is responsible of creating the non-chardev whiteouts
> > > >     as well as the marking the dirs that contains them with
> > > >     "overlay.xattr_whiteouts" - overlayfs itself never does that
> > > > 4. ovl_calc_d_ino() (from readdir on a merge dir) returns true if the
> > > >     the iterated dir has "overlay.xattr_whiteouts" xattr
> > > > 5. That will cause ovl_cache_update_ino() to lookup the
> > > >     *overlay dentry* that will be negative (as per rule 1 above)
> > > >     if either whiteout format is found at topmost layer and that
> > > >     will cause the readdir dirent to be marked is_whiteout and
> > > >     filtered out of readdir results
> > > >
> > > > * The trick for readdir is that the the per dirent DT_CHR optimization
> > > >   is traded off for a per parent dir optimization, but even the worst case
> > > >   where all directories have xattr_whiteouts, readdir is not more
> > > >   expensive than readdir with xino enabled, which is the default for
> > > >   several Linux distros
> > > >
> > > > Hope this is more clear?
> > >
> > > Ok, so I implemented this, both using the transforming-to-whiteout and
> > > the alternative-whiteout approach.
> > >
> > > Here is the transform-to-whiteout approach:
> > >   https://github.com/alexlarsson/linux/tree/ovl-nesting-transform
> > >
> > > In it, if you have a lower dir with these files/xattrs:
> > >  * lowerdir/foo - directory
> > >      trusted.overlay.whiteouts
> > >  * lowerdir/foo/hide_file - regular file
> > >      trusted.overlay.whiteout
> > >
> > > Then you will get an overlay no-userxattr mount like this:
> > >  * lowerdir/foo - directory
> > >  * lowerdir/foo/hide_file - chardev(0,0) file
> > >
> > > This can be used as a lower in any overlayfs mount you want, userxattr or no.
> > >
> > > Here is the alternative-whiteout approach:
> > >  https://github.com/alexlarsson/linux/tree/ovl-nesting-alternative
> > >
> > > In it, if you have a lower dir with these files/xattrs:
> > >  * lowerdir/foo - directory
> > >      trusted.overlay.overlay.whiteouts
> > >      user.overlay.whiteouts
> > >   * lowerdir/foo/hide_file - regular file
> > >      trusted.overlay.overlay.whiteout
> > >      user.overlay.whiteout
> > >
> > > Then you will get an overlay no-userxattr mount like this:
> > >  * lowerdir/foo - directory
> > >      trusted.overlay.whiteouts
> > >      user.overlay.whiteouts
> > >   * lowerdir/foo/hide_file - regular file
> > >      trusted.overlay.whiteout
> > >      user.overlay.whiteout
> > >
> > > This can be used as a lower in any overlayfs mount you want, userxattr or no.
> > >
> > > I prefer the transform-to-whiteout approach for a two reasons:
> > >
> > > Given an existing image (say an OCI image) we can construct an
> > > overlayfs mount that is not just functionally identical, but also
> > > indistinguible from the expected one. I.e. if the original OCI image
> > > had a chardev(0,0) we will still have one in the mount.
> > >
> >
> > I thought that OCI image format is using a different without format
> > which is converted to ovl whiteout format anyway:
> > https://github.com/opencontainers/image-spec/blob/main/layer.md#whiteouts
>
> Yeah, but those whiteouts are not the target of this work. An OCI
> image is a set of tarballs, and one tarball can contain magically
> marked ".wh." prefixed files for whiteouts. These are converted to
> real whiteouts by docker. However, suppose the image itself contains
> an overlayfs lower directory, which the app in the container wants to
> use. This would look like a chardev(0,0) (not a .wh.*) in the tarball.
> If this is naively extracted to disk (without escaping) then those
> whiteouts will not be visible to the container app when it runs in the
> container, because the outer overlayfs ate them.
>
> Such whiteouts in OCI containers don't currently work, so you won't
> find any such OCI containers in the wild. But with escapes it seems
> useful. Something similar that you *will* find in the wild however, is
> ostree images with whiteouts in them, and we want to keep supporting
> these. See more below.
>
> > Also, IIUC, you want this feature for mkfs.composefs, which doesn't
> > have backward compat requirements and doesn't need to create
> > ovl images identical to existing ones. Please correct me if I am wrong.
>
> One of the goals is to use composefs as a backend for ostree (instead
> of hardlinked trees). And ostree images with whiteouts in them are
> pretty common (typically from os images with preinstalled container
> images).
>
> If mkfs.composefs converted these to xattr whiteouts it may work when
> you run the containers in the image. But its not ideal if the content
> of the rootfs depends on the ostree backend, and its quite possible
> that some existing tooling will be confused by the new whiteouts.
>

I think I understand. I will try to remember how all those pieces
work together...

> > > When creating multiple lower dirs (e.g. from a multi-layered OCI
> > > image) you have to carry forward xattrs on directories from the lower
> > > layers to the upper, otherwise a merged directory from a higher layer
> > > will overwrite the "overlay.whiteouts" xattr. This makes an otherwise
> > > local operation (just escape the files in this layer) to a global one
> > > that depends on all layers.
> >
> > I don't understand this claim. In you implementation:
> > rdd->in_xwhiteouts_dir =
> >    ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath);
> > checks the "overlay.whiteouts" xattr on every layer.
> >
> > Checking if an entry is a whiteout only matters in the uppermost layer
> > that this named entry is found.
>
> Yeah, that is true for the whiteouts, but not the escaped whiteouts.
> It's a bit confusing, so let me give an example:
>
> Suppose I have this file structure with a 2 layer overlayfs with an xwhiteout.
>
> ├── layer1
> │   └── dir
> │       └── file
> └── layer2
>     └── dir - overlay.whiteouts
>         └── file - overlay.whiteout
>
> (At this point, it is true what you say. If a layer3/dir existed
> without "overlay.whiteouts", things would still work.)
>
> Now I want to store this structure as the first layer inside another
> overlayfs by escaping the xattrs, it would look like this:
>
> layerA
> ├── layer1
> │   └── dir
> │       └── file
> └── layer2
>     └── dir - overlay.overlay.whiteouts
>         └── file - overlay.overlay.whiteout
> layerB
> └── layer2
>     └── dir
>         └── new-file
>
> You'd use it like this:
>  mount -t overlay -o lowerdir=layerB:layerA overlay mntAB
>  mount -t overlay -o lowerdir=mntAB/layer2:mntAB/layer1 overlay mnt12

Painful example.
Next time please draw the top most layers on top and
not on bottom. I think it will be clearer this way.

>
> However, if you try this, you'll notice that It doesn't work, due to a
> missing "overlay.whiteouts" xattr on mntAB/layer2/dir.
> This is caused by the file that got added to layer2/dir in layerB.
> Since we got a new merged directory (layerB/layer2/dir) it overrides
> the escaped xattrs from layerA/layer2/dir.
>
> This is not an absolute showstopper, but it would be a nice property
> if escaping a layer was an isolated operation independent of the other
> layers.
>

I see your point.

> > My understanding is that is mkfs.composefs creates a xwhiteout file,
> > it ONLY needs to locally set the overlay.whiteouts xattr on its parent
> > dir. I don't understand how multi layers matter.
> >
> > >
> > > In terms of implementation complexity I think they are very similar.
> > >
> >
> > Sorry. I disagree. The two implementations may be on par wrt
> > lines of code, but I perceive the transform-to-whiteout patch as
> > harder to maintain.
> >
> > The fact that an xattr changes the file type seems terrifying to me.
> > Imagine that upper layer has a non-empty regular file with
> > overlay.whiteout xattr - this is exposed as a regular file and then
> > user truncates this regular file - BOOM (turn to whiteout?).
>
> Nod.
>
> > I need to understand your concerns about my proposal because
> > I did not get them.
>
> Hopefully I explained it above, so we can come up with a solution.
>

I think that the first thing to do would be to minimize the requirements.
I have lost track of the use cases and possible combinations of ostree
containers and unprivileged users.

Perhaps narrowing the requirement can simplify the design?

One option that we should consider - it may simplify the design a bit -
use an opt-in mount option to support interpreting escaping xattrs.
It may be wise to opt-in to this feature anyway - less chance for
regressions we did not think about.

This means you can use xwhiteouts with no need of marking the
containing directories. ovl_calc_d_ino() would just blindly check for
ofs->config.<something>.

Would the opt-in mount option be a problem to your use cases?

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-25  8:40                             ` Amir Goldstein
@ 2023-08-25 11:29                               ` Alexander Larsson
  2023-08-25 14:39                                 ` Amir Goldstein
  0 siblings, 1 reply; 47+ messages in thread
From: Alexander Larsson @ 2023-08-25 11:29 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, linux-unionfs

On Fri, Aug 25, 2023 at 10:41 AM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Thu, Aug 24, 2023 at 5:23 PM Alexander Larsson <alexl@redhat.com> wrote:
> >
> > On Thu, Aug 24, 2023 at 1:43 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > >
> > > On Thu, Aug 24, 2023 at 12:56 PM Alexander Larsson <alexl@redhat.com> wrote:
> > > >
> > > > On Wed, Aug 23, 2023 at 5:50 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > > > >
> > > > > On Wed, Aug 23, 2023 at 5:52 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > > >
> > > > > > On Wed, 23 Aug 2023 at 16:43, Amir Goldstein <amir73il@gmail.com> wrote:
> > > > > >
> > > > > > > If we do this, then both overlay.whiteout and overlay.xattr_whiteouts
> > > > > > > xattrs will be xattrs that the overlayfs driver never sets.
> > > > > > > It's a precedent, but as long as it is properly documented and encoded
> > > > > > > in fstests, I will be fine with it. Not sure about Miklos.
> > > > > >
> > > > > > Firstly I need to properly understand the proposal.  At this point I'm
> > > > > > not sure what overlay.whiteout is supposed to mean.   Does it mean the
> > > > > > same as a whiteout (chrdev(0,0))?  Or does it mean that overlayfs
> > > > > > should not treat it as a whiteout, but instead transform that into a
> > > > > > chrdev(0,0) for the top overlay to interpret as a whiteout?  Or
> > > > > > something else?
> > > > > >
> > > > >
> > > > > My proposal does not involve any transformation.
> > > > > It is "just" to support another format for a whiteout.
> > > > > Transforming a REG or FIFO real object to CHR ovl object
> > > > > will be a pain IMO and I don't see why it is needed.
> > > > >
> > > > > Let me try again from the top:
> > > > > 1. ovl_path_is_whiteout() checks if either ovl_is_whiteout() (chardev(0,0))
> > > > >     or regular file with "overlay.whiteout" xattr, so ovl_lookup()
> > > > > will result in
> > > > >     a negative dentry if either whiteout format is found at topmost layer
> > > > > 2. To optimize unneeded getxattr, "overlay.whiteout" xattr could be checked
> > > > >     only in case the parent dir has xattr "overlay.xattr_whiteouts"
> > > > > 3. mkfs.composefs is responsible of creating the non-chardev whiteouts
> > > > >     as well as the marking the dirs that contains them with
> > > > >     "overlay.xattr_whiteouts" - overlayfs itself never does that
> > > > > 4. ovl_calc_d_ino() (from readdir on a merge dir) returns true if the
> > > > >     the iterated dir has "overlay.xattr_whiteouts" xattr
> > > > > 5. That will cause ovl_cache_update_ino() to lookup the
> > > > >     *overlay dentry* that will be negative (as per rule 1 above)
> > > > >     if either whiteout format is found at topmost layer and that
> > > > >     will cause the readdir dirent to be marked is_whiteout and
> > > > >     filtered out of readdir results
> > > > >
> > > > > * The trick for readdir is that the the per dirent DT_CHR optimization
> > > > >   is traded off for a per parent dir optimization, but even the worst case
> > > > >   where all directories have xattr_whiteouts, readdir is not more
> > > > >   expensive than readdir with xino enabled, which is the default for
> > > > >   several Linux distros
> > > > >
> > > > > Hope this is more clear?
> > > >
> > > > Ok, so I implemented this, both using the transforming-to-whiteout and
> > > > the alternative-whiteout approach.
> > > >
> > > > Here is the transform-to-whiteout approach:
> > > >   https://github.com/alexlarsson/linux/tree/ovl-nesting-transform
> > > >
> > > > In it, if you have a lower dir with these files/xattrs:
> > > >  * lowerdir/foo - directory
> > > >      trusted.overlay.whiteouts
> > > >  * lowerdir/foo/hide_file - regular file
> > > >      trusted.overlay.whiteout
> > > >
> > > > Then you will get an overlay no-userxattr mount like this:
> > > >  * lowerdir/foo - directory
> > > >  * lowerdir/foo/hide_file - chardev(0,0) file
> > > >
> > > > This can be used as a lower in any overlayfs mount you want, userxattr or no.
> > > >
> > > > Here is the alternative-whiteout approach:
> > > >  https://github.com/alexlarsson/linux/tree/ovl-nesting-alternative
> > > >
> > > > In it, if you have a lower dir with these files/xattrs:
> > > >  * lowerdir/foo - directory
> > > >      trusted.overlay.overlay.whiteouts
> > > >      user.overlay.whiteouts
> > > >   * lowerdir/foo/hide_file - regular file
> > > >      trusted.overlay.overlay.whiteout
> > > >      user.overlay.whiteout
> > > >
> > > > Then you will get an overlay no-userxattr mount like this:
> > > >  * lowerdir/foo - directory
> > > >      trusted.overlay.whiteouts
> > > >      user.overlay.whiteouts
> > > >   * lowerdir/foo/hide_file - regular file
> > > >      trusted.overlay.whiteout
> > > >      user.overlay.whiteout
> > > >
> > > > This can be used as a lower in any overlayfs mount you want, userxattr or no.
> > > >
> > > > I prefer the transform-to-whiteout approach for a two reasons:
> > > >
> > > > Given an existing image (say an OCI image) we can construct an
> > > > overlayfs mount that is not just functionally identical, but also
> > > > indistinguible from the expected one. I.e. if the original OCI image
> > > > had a chardev(0,0) we will still have one in the mount.
> > > >
> > >
> > > I thought that OCI image format is using a different without format
> > > which is converted to ovl whiteout format anyway:
> > > https://github.com/opencontainers/image-spec/blob/main/layer.md#whiteouts
> >
> > Yeah, but those whiteouts are not the target of this work. An OCI
> > image is a set of tarballs, and one tarball can contain magically
> > marked ".wh." prefixed files for whiteouts. These are converted to
> > real whiteouts by docker. However, suppose the image itself contains
> > an overlayfs lower directory, which the app in the container wants to
> > use. This would look like a chardev(0,0) (not a .wh.*) in the tarball.
> > If this is naively extracted to disk (without escaping) then those
> > whiteouts will not be visible to the container app when it runs in the
> > container, because the outer overlayfs ate them.
> >
> > Such whiteouts in OCI containers don't currently work, so you won't
> > find any such OCI containers in the wild. But with escapes it seems
> > useful. Something similar that you *will* find in the wild however, is
> > ostree images with whiteouts in them, and we want to keep supporting
> > these. See more below.
> >
> > > Also, IIUC, you want this feature for mkfs.composefs, which doesn't
> > > have backward compat requirements and doesn't need to create
> > > ovl images identical to existing ones. Please correct me if I am wrong.
> >
> > One of the goals is to use composefs as a backend for ostree (instead
> > of hardlinked trees). And ostree images with whiteouts in them are
> > pretty common (typically from os images with preinstalled container
> > images).
> >
> > If mkfs.composefs converted these to xattr whiteouts it may work when
> > you run the containers in the image. But its not ideal if the content
> > of the rootfs depends on the ostree backend, and its quite possible
> > that some existing tooling will be confused by the new whiteouts.
> >
>
> I think I understand. I will try to remember how all those pieces
> work together...
>
> > > > When creating multiple lower dirs (e.g. from a multi-layered OCI
> > > > image) you have to carry forward xattrs on directories from the lower
> > > > layers to the upper, otherwise a merged directory from a higher layer
> > > > will overwrite the "overlay.whiteouts" xattr. This makes an otherwise
> > > > local operation (just escape the files in this layer) to a global one
> > > > that depends on all layers.
> > >
> > > I don't understand this claim. In you implementation:
> > > rdd->in_xwhiteouts_dir =
> > >    ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath);
> > > checks the "overlay.whiteouts" xattr on every layer.
> > >
> > > Checking if an entry is a whiteout only matters in the uppermost layer
> > > that this named entry is found.
> >
> > Yeah, that is true for the whiteouts, but not the escaped whiteouts.
> > It's a bit confusing, so let me give an example:
> >
> > Suppose I have this file structure with a 2 layer overlayfs with an xwhiteout.
> >
> > ├── layer1
> > │   └── dir
> > │       └── file
> > └── layer2
> >     └── dir - overlay.whiteouts
> >         └── file - overlay.whiteout
> >
> > (At this point, it is true what you say. If a layer3/dir existed
> > without "overlay.whiteouts", things would still work.)
> >
> > Now I want to store this structure as the first layer inside another
> > overlayfs by escaping the xattrs, it would look like this:
> >
> > layerA
> > ├── layer1
> > │   └── dir
> > │       └── file
> > └── layer2
> >     └── dir - overlay.overlay.whiteouts
> >         └── file - overlay.overlay.whiteout
> > layerB
> > └── layer2
> >     └── dir
> >         └── new-file
> >
> > You'd use it like this:
> >  mount -t overlay -o lowerdir=layerB:layerA overlay mntAB
> >  mount -t overlay -o lowerdir=mntAB/layer2:mntAB/layer1 overlay mnt12
>
> Painful example.
> Next time please draw the top most layers on top and
> not on bottom. I think it will be clearer this way.
>
> >
> > However, if you try this, you'll notice that It doesn't work, due to a
> > missing "overlay.whiteouts" xattr on mntAB/layer2/dir.
> > This is caused by the file that got added to layer2/dir in layerB.
> > Since we got a new merged directory (layerB/layer2/dir) it overrides
> > the escaped xattrs from layerA/layer2/dir.
> >
> > This is not an absolute showstopper, but it would be a nice property
> > if escaping a layer was an isolated operation independent of the other
> > layers.
> >
>
> I see your point.

I updated the ovl-nesting-alternative branch with an addition that
makes reading escaped whiteout dir markers take into account all
layers. With that I got the example above to work.

Those that look sane to you?

I also updated the composefs escape PR:
 https://github.com/containers/composefs/pull/175

With this I am able to take a directory containing a set of normal
overlayfs layers (including metacopy files and regular whiteouts), run
mkcomposefs on it, mount the composefs, and then mount an overlayfs
from the layer dirs in the composefs mount.

> > > My understanding is that is mkfs.composefs creates a xwhiteout file,
> > > it ONLY needs to locally set the overlay.whiteouts xattr on its parent
> > > dir. I don't understand how multi layers matter.
> > >
> > > >
> > > > In terms of implementation complexity I think they are very similar.
> > > >
> > >
> > > Sorry. I disagree. The two implementations may be on par wrt
> > > lines of code, but I perceive the transform-to-whiteout patch as
> > > harder to maintain.
> > >
> > > The fact that an xattr changes the file type seems terrifying to me.
> > > Imagine that upper layer has a non-empty regular file with
> > > overlay.whiteout xattr - this is exposed as a regular file and then
> > > user truncates this regular file - BOOM (turn to whiteout?).
> >
> > Nod.
> >
> > > I need to understand your concerns about my proposal because
> > > I did not get them.
> >
> > Hopefully I explained it above, so we can come up with a solution.
> >
>
> I think that the first thing to do would be to minimize the requirements.
> I have lost track of the use cases and possible combinations of ostree
> containers and unprivileged users.
>
> Perhaps narrowing the requirement can simplify the design?
>
> One option that we should consider - it may simplify the design a bit -
> use an opt-in mount option to support interpreting escaping xattrs.
> It may be wise to opt-in to this feature anyway - less chance for
> regressions we did not think about.

That is not really helpful to me, I need to support a composefs mount
that contains
files with overlay xattrs (to support e.g. container layers with
metacopy) as well as whiteout files (to support container layers with
whiteouts).

> This means you can use xwhiteouts with no need of marking the
> containing directories. ovl_calc_d_ino() would just blindly check for
> ofs->config.<something>.
>
> Would the opt-in mount option be a problem to your use cases?

The only option I see that could potentially useful for the composefs
usecase is to *disable* regular whiteout processing, and just expose
chardev(0,0) files as they are. Then we can use xwhiteouts for the
composefs layers and expose the chardev(0,0) files in the image as is.
Not sure this is worth it though?

At the end of the day, being able to nest overlayfs with whiteouts in
them Is a clear improvement on what we had. Then we just have to make
userspace handle the fact that the whiteout files change types when in
a composefs image.

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                Red Hat, Inc
       alexl@redhat.com         alexander.larsson@gmail.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-25 11:29                               ` Alexander Larsson
@ 2023-08-25 14:39                                 ` Amir Goldstein
  0 siblings, 0 replies; 47+ messages in thread
From: Amir Goldstein @ 2023-08-25 14:39 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: Miklos Szeredi, linux-unionfs

On Fri, Aug 25, 2023 at 2:30 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> On Fri, Aug 25, 2023 at 10:41 AM Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Thu, Aug 24, 2023 at 5:23 PM Alexander Larsson <alexl@redhat.com> wrote:
> > >
> > > On Thu, Aug 24, 2023 at 1:43 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > > >
> > > > On Thu, Aug 24, 2023 at 12:56 PM Alexander Larsson <alexl@redhat.com> wrote:
> > > > >
> > > > > On Wed, Aug 23, 2023 at 5:50 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > > > > >
> > > > > > On Wed, Aug 23, 2023 at 5:52 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > > > >
> > > > > > > On Wed, 23 Aug 2023 at 16:43, Amir Goldstein <amir73il@gmail.com> wrote:
> > > > > > >
> > > > > > > > If we do this, then both overlay.whiteout and overlay.xattr_whiteouts
> > > > > > > > xattrs will be xattrs that the overlayfs driver never sets.
> > > > > > > > It's a precedent, but as long as it is properly documented and encoded
> > > > > > > > in fstests, I will be fine with it. Not sure about Miklos.
> > > > > > >
> > > > > > > Firstly I need to properly understand the proposal.  At this point I'm
> > > > > > > not sure what overlay.whiteout is supposed to mean.   Does it mean the
> > > > > > > same as a whiteout (chrdev(0,0))?  Or does it mean that overlayfs
> > > > > > > should not treat it as a whiteout, but instead transform that into a
> > > > > > > chrdev(0,0) for the top overlay to interpret as a whiteout?  Or
> > > > > > > something else?
> > > > > > >
> > > > > >
> > > > > > My proposal does not involve any transformation.
> > > > > > It is "just" to support another format for a whiteout.
> > > > > > Transforming a REG or FIFO real object to CHR ovl object
> > > > > > will be a pain IMO and I don't see why it is needed.
> > > > > >
> > > > > > Let me try again from the top:
> > > > > > 1. ovl_path_is_whiteout() checks if either ovl_is_whiteout() (chardev(0,0))
> > > > > >     or regular file with "overlay.whiteout" xattr, so ovl_lookup()
> > > > > > will result in
> > > > > >     a negative dentry if either whiteout format is found at topmost layer
> > > > > > 2. To optimize unneeded getxattr, "overlay.whiteout" xattr could be checked
> > > > > >     only in case the parent dir has xattr "overlay.xattr_whiteouts"
> > > > > > 3. mkfs.composefs is responsible of creating the non-chardev whiteouts
> > > > > >     as well as the marking the dirs that contains them with
> > > > > >     "overlay.xattr_whiteouts" - overlayfs itself never does that
> > > > > > 4. ovl_calc_d_ino() (from readdir on a merge dir) returns true if the
> > > > > >     the iterated dir has "overlay.xattr_whiteouts" xattr
> > > > > > 5. That will cause ovl_cache_update_ino() to lookup the
> > > > > >     *overlay dentry* that will be negative (as per rule 1 above)
> > > > > >     if either whiteout format is found at topmost layer and that
> > > > > >     will cause the readdir dirent to be marked is_whiteout and
> > > > > >     filtered out of readdir results
> > > > > >
> > > > > > * The trick for readdir is that the the per dirent DT_CHR optimization
> > > > > >   is traded off for a per parent dir optimization, but even the worst case
> > > > > >   where all directories have xattr_whiteouts, readdir is not more
> > > > > >   expensive than readdir with xino enabled, which is the default for
> > > > > >   several Linux distros
> > > > > >
> > > > > > Hope this is more clear?
> > > > >
> > > > > Ok, so I implemented this, both using the transforming-to-whiteout and
> > > > > the alternative-whiteout approach.
> > > > >
> > > > > Here is the transform-to-whiteout approach:
> > > > >   https://github.com/alexlarsson/linux/tree/ovl-nesting-transform
> > > > >
> > > > > In it, if you have a lower dir with these files/xattrs:
> > > > >  * lowerdir/foo - directory
> > > > >      trusted.overlay.whiteouts
> > > > >  * lowerdir/foo/hide_file - regular file
> > > > >      trusted.overlay.whiteout
> > > > >
> > > > > Then you will get an overlay no-userxattr mount like this:
> > > > >  * lowerdir/foo - directory
> > > > >  * lowerdir/foo/hide_file - chardev(0,0) file
> > > > >
> > > > > This can be used as a lower in any overlayfs mount you want, userxattr or no.
> > > > >
> > > > > Here is the alternative-whiteout approach:
> > > > >  https://github.com/alexlarsson/linux/tree/ovl-nesting-alternative
> > > > >
> > > > > In it, if you have a lower dir with these files/xattrs:
> > > > >  * lowerdir/foo - directory
> > > > >      trusted.overlay.overlay.whiteouts
> > > > >      user.overlay.whiteouts
> > > > >   * lowerdir/foo/hide_file - regular file
> > > > >      trusted.overlay.overlay.whiteout
> > > > >      user.overlay.whiteout
> > > > >
> > > > > Then you will get an overlay no-userxattr mount like this:
> > > > >  * lowerdir/foo - directory
> > > > >      trusted.overlay.whiteouts
> > > > >      user.overlay.whiteouts
> > > > >   * lowerdir/foo/hide_file - regular file
> > > > >      trusted.overlay.whiteout
> > > > >      user.overlay.whiteout
> > > > >
> > > > > This can be used as a lower in any overlayfs mount you want, userxattr or no.
> > > > >
> > > > > I prefer the transform-to-whiteout approach for a two reasons:
> > > > >
> > > > > Given an existing image (say an OCI image) we can construct an
> > > > > overlayfs mount that is not just functionally identical, but also
> > > > > indistinguible from the expected one. I.e. if the original OCI image
> > > > > had a chardev(0,0) we will still have one in the mount.
> > > > >
> > > >
> > > > I thought that OCI image format is using a different without format
> > > > which is converted to ovl whiteout format anyway:
> > > > https://github.com/opencontainers/image-spec/blob/main/layer.md#whiteouts
> > >
> > > Yeah, but those whiteouts are not the target of this work. An OCI
> > > image is a set of tarballs, and one tarball can contain magically
> > > marked ".wh." prefixed files for whiteouts. These are converted to
> > > real whiteouts by docker. However, suppose the image itself contains
> > > an overlayfs lower directory, which the app in the container wants to
> > > use. This would look like a chardev(0,0) (not a .wh.*) in the tarball.
> > > If this is naively extracted to disk (without escaping) then those
> > > whiteouts will not be visible to the container app when it runs in the
> > > container, because the outer overlayfs ate them.
> > >
> > > Such whiteouts in OCI containers don't currently work, so you won't
> > > find any such OCI containers in the wild. But with escapes it seems
> > > useful. Something similar that you *will* find in the wild however, is
> > > ostree images with whiteouts in them, and we want to keep supporting
> > > these. See more below.
> > >
> > > > Also, IIUC, you want this feature for mkfs.composefs, which doesn't
> > > > have backward compat requirements and doesn't need to create
> > > > ovl images identical to existing ones. Please correct me if I am wrong.
> > >
> > > One of the goals is to use composefs as a backend for ostree (instead
> > > of hardlinked trees). And ostree images with whiteouts in them are
> > > pretty common (typically from os images with preinstalled container
> > > images).
> > >
> > > If mkfs.composefs converted these to xattr whiteouts it may work when
> > > you run the containers in the image. But its not ideal if the content
> > > of the rootfs depends on the ostree backend, and its quite possible
> > > that some existing tooling will be confused by the new whiteouts.
> > >
> >
> > I think I understand. I will try to remember how all those pieces
> > work together...
> >
> > > > > When creating multiple lower dirs (e.g. from a multi-layered OCI
> > > > > image) you have to carry forward xattrs on directories from the lower
> > > > > layers to the upper, otherwise a merged directory from a higher layer
> > > > > will overwrite the "overlay.whiteouts" xattr. This makes an otherwise
> > > > > local operation (just escape the files in this layer) to a global one
> > > > > that depends on all layers.
> > > >
> > > > I don't understand this claim. In you implementation:
> > > > rdd->in_xwhiteouts_dir =
> > > >    ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath);
> > > > checks the "overlay.whiteouts" xattr on every layer.
> > > >
> > > > Checking if an entry is a whiteout only matters in the uppermost layer
> > > > that this named entry is found.
> > >
> > > Yeah, that is true for the whiteouts, but not the escaped whiteouts.
> > > It's a bit confusing, so let me give an example:
> > >
> > > Suppose I have this file structure with a 2 layer overlayfs with an xwhiteout.
> > >
> > > ├── layer1
> > > │   └── dir
> > > │       └── file
> > > └── layer2
> > >     └── dir - overlay.whiteouts
> > >         └── file - overlay.whiteout
> > >
> > > (At this point, it is true what you say. If a layer3/dir existed
> > > without "overlay.whiteouts", things would still work.)
> > >
> > > Now I want to store this structure as the first layer inside another
> > > overlayfs by escaping the xattrs, it would look like this:
> > >
> > > layerA
> > > ├── layer1
> > > │   └── dir
> > > │       └── file
> > > └── layer2
> > >     └── dir - overlay.overlay.whiteouts
> > >         └── file - overlay.overlay.whiteout
> > > layerB
> > > └── layer2
> > >     └── dir
> > >         └── new-file
> > >
> > > You'd use it like this:
> > >  mount -t overlay -o lowerdir=layerB:layerA overlay mntAB
> > >  mount -t overlay -o lowerdir=mntAB/layer2:mntAB/layer1 overlay mnt12
> >
> > Painful example.
> > Next time please draw the top most layers on top and
> > not on bottom. I think it will be clearer this way.
> >
> > >
> > > However, if you try this, you'll notice that It doesn't work, due to a
> > > missing "overlay.whiteouts" xattr on mntAB/layer2/dir.
> > > This is caused by the file that got added to layer2/dir in layerB.
> > > Since we got a new merged directory (layerB/layer2/dir) it overrides
> > > the escaped xattrs from layerA/layer2/dir.
> > >
> > > This is not an absolute showstopper, but it would be a nice property
> > > if escaping a layer was an isolated operation independent of the other
> > > layers.
> > >
> >
> > I see your point.
>
> I updated the ovl-nesting-alternative branch with an addition that
> makes reading escaped whiteout dir markers take into account all
> layers. With that I got the example above to work.
>
> Those that look sane to you?
>

I can live with that slightly odd code as it is harmless for
anything other than this corner use case, just please add it
in a separate commit with some drawing like above to
explain the problem that it is solving.

Let's see what Miklos has to say.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
  2023-08-22 17:29                     ` Amir Goldstein
@ 2023-09-08  2:21                       ` Gao Xiang
       [not found]                         ` <CAL7ro1EsjzNeYAbjPN3HnB7Sq4D48my3PGhPG5L+4DTXzA9xFw@mail.gmail.com>
  0 siblings, 1 reply; 47+ messages in thread
From: Gao Xiang @ 2023-09-08  2:21 UTC (permalink / raw)
  To: Amir Goldstein, Miklos Szeredi; +Cc: Alexander Larsson, linux-unionfs

Hi folks,

On 2023/8/23 01:29, Amir Goldstein wrote:
> On Tue, Aug 22, 2023 at 7:19 PM Miklos Szeredi <miklos@szeredi.hu> wrote:

...

>>
> 
> I proposed to look at the two features independently:
> 1. Nesting of overlayfs xattrs (patch 3/5)
> 2. Alternative format for whiteout (overlay.whiteout) that can be used
>     by container tools converting OCI/tar images to overlayfs images
> 
> Together, they provide a solution to Alexander's use case.
> 
> IIUC, the way this is intended to work is that mkfs.composefs
> is running inside a container, whose work directory is overlayfs
> and it composes some lower layers on that host mounted overlayfs.
> 
> mkfs.composefs composes layers with overlay.{metacopy,whiteout,redirect}
> xattrs (up to here it is standard mkfs.composefs) and because those layers
> are stored in overlayfs, the xattrs are stored in the host fs as
> overlay.overlay.*.
> 
> I hope I got the use case correctly?
Sorry for the dumb questions below.  I'm interested in the use cases:
after checking the previous github issue and emails (sorry if I'm
still missing something), I'm curious about this too.

I totally understand how it plans to work and how it works (by using
escape xattr prefixes) but I'm not sure if I'm quite understand the
original issue:

Do composefs use cases store overlayfs xattrs in the meta-only layer?
if so, such layer is actually hand-crafted by mkfs.  Why do we need
a way to keep escape xattrs on the underlay overlayfs?  Does the
other layer are data-only layers (do we keep some overlay xattrs in
these data-only layers)?

Thanks,
Gao Xiang

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs
       [not found]                         ` <CAL7ro1EsjzNeYAbjPN3HnB7Sq4D48my3PGhPG5L+4DTXzA9xFw@mail.gmail.com>
@ 2023-09-08  8:44                           ` Gao Xiang
  0 siblings, 0 replies; 47+ messages in thread
From: Gao Xiang @ 2023-09-08  8:44 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: Amir Goldstein, Miklos Szeredi, linux-unionfs

Hi Alexander,

On 2023/9/8 16:30, Alexander Larsson wrote:
> 
> 
> On Fri, Sep 8, 2023 at 4:21 AM Gao Xiang <hsiangkao@linux.alibaba.com <mailto:hsiangkao@linux.alibaba.com>> wrote:
> 

..

>      > I hope I got the use case correctly?
>     Sorry for the dumb questions below.  I'm interested in the use cases:
>     after checking the previous github issue and emails (sorry if I'm
>     still missing something), I'm curious about this too.
> 
>     I totally understand how it plans to work and how it works (by using
>     escape xattr prefixes) but I'm not sure if I'm quite understand the
>     original issue:
> 
>     Do composefs use cases store overlayfs xattrs in the meta-only layer?
>     if so, such layer is actually hand-crafted by mkfs.  Why do we need
>     a way to keep escape xattrs on the underlay overlayfs?  Does the
>     other layer are data-only layers (do we keep some overlay xattrs in
>     these data-only layers)?
> 
> 
> Here is the problem statement:
> 
> I use composefs for my rootfs. I want to be able to store any kind of files in it and have them be visible in the final rootfs. In particular I want it to contain a whiteout file, because I want to mount an additional overlayfs where the lowerdir is stored on the rootfs (i.e. in the composefs mount).
> 
> The naive way to accomplish this is to just put a chardev(0,0) file in the erofs image that is the metadata layer in the rootfs overlayfs. I mean, that is what we do with all other file types.
> 
> However, if you were to do this, then the overlayfs that composefs uses will interpret the whiteout as masking out a file from the data-only layer in the composefs mount. This means the whiteout file we wanted is not visible in the final rootfs.

Thanks for your explanation.  That is helpful for me to
know what was happening here.

Okay, I got the point, so basically I think the original
use case is to have a way to "make such whiteouts in the
meta-only layer that overlayfs doesn't interpret/drop in
the composefs mount" (yes, a special whiteout is a
workable solution. )

Personally, there could be several ways to resolve this.
I have no more questions, thanks for the reply :)

Thanks,
Gao Xiang

> 
> -- 
> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
>   Alexander Larsson                                Red Hat, Inc
> alexl@redhat.com <mailto:alexl@redhat.com>alexander.larsson@gmail.com <mailto:alexander.larsson@gmail.com>

^ permalink raw reply	[flat|nested] 47+ messages in thread

end of thread, other threads:[~2023-09-08  8:44 UTC | newest]

Thread overview: 47+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-08-17 11:05 [PATCH v2 0/5] Support nested overlayfs mounts Alexander Larsson
2023-08-17 11:05 ` [PATCH v2 1/5] ovl: Move xattr support to new xattrs.c file Alexander Larsson
2023-08-17 12:48   ` Amir Goldstein
2023-08-17 11:05 ` [PATCH v2 2/5] ovl: Add OVL_XATTR_TRUSTED/USER_PREFIX_LEN macros Alexander Larsson
2023-08-17 12:48   ` Amir Goldstein
2023-08-17 11:05 ` [PATCH v2 3/5] ovl: Support escaped overlay.* xattrs Alexander Larsson
2023-08-17 14:31   ` Amir Goldstein
2023-08-17 15:00     ` Alexander Larsson
2023-08-17 11:05 ` [PATCH v2 4/5] ovl: Support creation of whiteout files on overlayfs Alexander Larsson
2023-08-17 13:40   ` Amir Goldstein
2023-08-21 10:59   ` Miklos Szeredi
2023-08-21 12:55     ` Alexander Larsson
2023-08-21 13:25       ` Amir Goldstein
2023-08-21 15:31         ` Alexander Larsson
2023-08-21 15:40           ` Miklos Szeredi
2023-08-21 20:07             ` Alexander Larsson
2023-08-22 13:22     ` Alexander Larsson
2023-08-22 13:56       ` Miklos Szeredi
2023-08-22 14:25         ` Alexander Larsson
2023-08-22 14:36           ` Alexander Larsson
2023-08-22 15:02             ` Miklos Szeredi
2023-08-22 15:31               ` Alexander Larsson
2023-08-22 15:30             ` Amir Goldstein
2023-08-22 15:43               ` Alexander Larsson
2023-08-22 15:57                 ` Amir Goldstein
2023-08-22 16:18                   ` Miklos Szeredi
2023-08-22 17:29                     ` Amir Goldstein
2023-09-08  2:21                       ` Gao Xiang
     [not found]                         ` <CAL7ro1EsjzNeYAbjPN3HnB7Sq4D48my3PGhPG5L+4DTXzA9xFw@mail.gmail.com>
2023-09-08  8:44                           ` Gao Xiang
2023-08-23  8:51                   ` Alexander Larsson
2023-08-23 13:13               ` Alexander Larsson
2023-08-23 13:21                 ` Alexander Larsson
2023-08-23 14:42                 ` Amir Goldstein
2023-08-23 14:52                   ` Miklos Szeredi
2023-08-23 15:02                     ` Alexander Larsson
2023-08-23 15:50                     ` Amir Goldstein
2023-08-24  9:56                       ` Alexander Larsson
2023-08-24 11:43                         ` Amir Goldstein
2023-08-24 14:22                           ` Alexander Larsson
2023-08-25  8:40                             ` Amir Goldstein
2023-08-25 11:29                               ` Alexander Larsson
2023-08-25 14:39                                 ` Amir Goldstein
2023-08-23 14:50                 ` Alexander Larsson
2023-08-17 11:05 ` [PATCH v2 5/5] ovl: Add documentation on nesting of overlayfs mounts Alexander Larsson
2023-08-17 13:31   ` Amir Goldstein
2023-08-17 13:59     ` Alexander Larsson
2023-08-17 14:45 ` [PATCH v2 0/5] Support nested " Amir Goldstein

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox