[PATCH] ovl: fsync after metadata copy-up via mount option "upsync=strict"

public inbox for linux-unionfs@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] ovl: fsync after metadata copy-up via mount option "upsync=strict"
@ 2024-07-18  3:43 Fei Lv
  2024-07-19  7:23 ` Amir Goldstein
  0 siblings, 1 reply; 5+ messages in thread
From: Fei Lv @ 2024-07-18  3:43 UTC (permalink / raw)
  To: miklos, amir73il, linux-unionfs, linux-kernel, lianghuxu, feilv

If a directory only exist in low layer, create a new file in it trigger
directory copy-up. Permission lost of the new directory in upper layer
was observed during power-cut stress test.

Fix by adding new mount opion "upsync=strict", make sure data/metadata of
copied up directory written to disk before renaming from tmp to final
destination.

Signed-off-by: Fei Lv <feilv@asrmicro.com>
---
OPT_sync changed to OPT_upsync since mount option "sync" already used.

 fs/overlayfs/copy_up.c   | 21 +++++++++++++++++++++
 fs/overlayfs/ovl_entry.h | 20 ++++++++++++++++++--
 fs/overlayfs/params.c    | 33 +++++++++++++++++++++++++++++----
 fs/overlayfs/super.c     |  2 +-
 4 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index a5ef2005a2cc..b6f021ad7a43 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -243,6 +243,21 @@ static int ovl_verify_area(loff_t pos, loff_t pos2, loff_t len, loff_t totlen)
 	return 0;
 }
 
+static int ovl_copy_up_sync(struct path *path)
+{
+	struct file *new_file;
+	int err;
+
+	new_file = ovl_path_open(path, O_LARGEFILE | O_WRONLY);
+	if (IS_ERR(new_file))
+		return PTR_ERR(new_file);
+
+	err = vfs_fsync(new_file, 0);
+	fput(new_file);
+
+	return err;
+}
+
 static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
 			    struct file *new_file, loff_t len)
 {
@@ -701,6 +716,9 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
 		err = ovl_set_attr(ofs, temp, &c->stat);
 	inode_unlock(temp->d_inode);
 
+	if (!err && ovl_should_sync_strict(ofs))
+		err = ovl_copy_up_sync(&upperpath);
+
 	return err;
 }
 
@@ -1104,6 +1122,9 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
 	ovl_clear_flag(OVL_HAS_DIGEST, d_inode(c->dentry));
 	ovl_clear_flag(OVL_VERIFIED_DIGEST, d_inode(c->dentry));
 	ovl_set_upperdata(d_inode(c->dentry));
+
+	if (!err && ovl_should_sync_strict(ofs))
+		err = ovl_copy_up_sync(&upperpath);
 out_free:
 	kfree(capability);
 out:
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index cb449ab310a7..4592e6f7dcf7 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -5,6 +5,12 @@
  * Copyright (C) 2016 Red Hat, Inc.
  */
 
+enum {
+	OVL_SYNC_DATA,
+	OVL_SYNC_STRICT,
+	OVL_SYNC_OFF,
+};
+
 struct ovl_config {
 	char *upperdir;
 	char *workdir;
@@ -18,7 +24,7 @@ struct ovl_config {
 	int xino;
 	bool metacopy;
 	bool userxattr;
-	bool ovl_volatile;
+	int sync_mode;
 };
 
 struct ovl_sb {
@@ -120,7 +126,17 @@ static inline struct ovl_fs *OVL_FS(struct super_block *sb)
 
 static inline bool ovl_should_sync(struct ovl_fs *ofs)
 {
-	return !ofs->config.ovl_volatile;
+	return ofs->config.sync_mode == OVL_SYNC_DATA;
+}
+
+static inline bool ovl_should_sync_strict(struct ovl_fs *ofs)
+{
+	return ofs->config.sync_mode == OVL_SYNC_STRICT;
+}
+
+static inline bool ovl_is_volatile(struct ovl_config *config)
+{
+	return config->sync_mode == OVL_SYNC_OFF;
 }
 
 static inline unsigned int ovl_numlower(struct ovl_entry *oe)
diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
index 4860fcc4611b..5d5538dd3de7 100644
--- a/fs/overlayfs/params.c
+++ b/fs/overlayfs/params.c
@@ -58,6 +58,7 @@ enum ovl_opt {
 	Opt_xino,
 	Opt_metacopy,
 	Opt_verity,
+	Opt_upsync,
 	Opt_volatile,
 };
 
@@ -139,6 +140,23 @@ static int ovl_verity_mode_def(void)
 	return OVL_VERITY_OFF;
 }
 
+static const struct constant_table ovl_parameter_upsync[] = {
+	{ "data",	OVL_SYNC_DATA      },
+	{ "strict",	OVL_SYNC_STRICT    },
+	{ "off",	OVL_SYNC_OFF       },
+	{}
+};
+
+static const char *ovl_upsync_mode(struct ovl_config *config)
+{
+	return ovl_parameter_upsync[config->sync_mode].name;
+}
+
+static int ovl_upsync_mode_def(void)
+{
+	return OVL_SYNC_DATA;
+}
+
 const struct fs_parameter_spec ovl_parameter_spec[] = {
 	fsparam_string_empty("lowerdir",    Opt_lowerdir),
 	fsparam_string("lowerdir+",         Opt_lowerdir_add),
@@ -154,6 +172,7 @@ const struct fs_parameter_spec ovl_parameter_spec[] = {
 	fsparam_enum("xino",                Opt_xino, ovl_parameter_xino),
 	fsparam_enum("metacopy",            Opt_metacopy, ovl_parameter_bool),
 	fsparam_enum("verity",              Opt_verity, ovl_parameter_verity),
+	fsparam_enum("upsync",              Opt_upsync, ovl_parameter_upsync),
 	fsparam_flag("volatile",            Opt_volatile),
 	{}
 };
@@ -617,8 +636,11 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	case Opt_verity:
 		config->verity_mode = result.uint_32;
 		break;
+	case Opt_upsync:
+		config->sync_mode = result.uint_32;
+		break;
 	case Opt_volatile:
-		config->ovl_volatile = true;
+		config->sync_mode = OVL_SYNC_OFF;
 		break;
 	case Opt_userxattr:
 		config->userxattr = true;
@@ -802,9 +824,9 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
 		config->index = false;
 	}
 
-	if (!config->upperdir && config->ovl_volatile) {
+	if (!config->upperdir && ovl_is_volatile(config)) {
 		pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
-		config->ovl_volatile = false;
+		config->sync_mode = ovl_upsync_mode_def();
 	}
 
 	if (!config->upperdir && config->uuid == OVL_UUID_ON) {
@@ -997,8 +1019,11 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 	if (ofs->config.metacopy != ovl_metacopy_def)
 		seq_printf(m, ",metacopy=%s",
 			   ofs->config.metacopy ? "on" : "off");
-	if (ofs->config.ovl_volatile)
+	if (ovl_is_volatile(&ofs->config))
 		seq_puts(m, ",volatile");
+	else if (ofs->config.sync_mode != ovl_upsync_mode_def())
+		seq_printf(m, ",upsync=%s",
+			   ovl_upsync_mode(&ofs->config));
 	if (ofs->config.userxattr)
 		seq_puts(m, ",userxattr");
 	if (ofs->config.verity_mode != ovl_verity_mode_def())
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 06a231970cb5..824cbcf40523 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -750,7 +750,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
 	 * For volatile mount, create a incompat/volatile/dirty file to keep
 	 * track of it.
 	 */
-	if (ofs->config.ovl_volatile) {
+	if (ovl_is_volatile(&ofs->config)) {
 		err = ovl_create_volatile_dirty(ofs);
 		if (err < 0) {
 			pr_err("Failed to create volatile/dirty file.\n");

base-commit: 0c3836482481200ead7b416ca80c68a29cfdaabd
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] ovl: fsync after metadata copy-up via mount option "upsync=strict"
  2024-07-18  3:43 [PATCH] ovl: fsync after metadata copy-up via mount option "upsync=strict" Fei Lv
@ 2024-07-19  7:23 ` Amir Goldstein
  2024-07-19  9:55   ` 答复: " Lv Fei（吕飞）
  0 siblings, 1 reply; 5+ messages in thread
From: Amir Goldstein @ 2024-07-19  7:23 UTC (permalink / raw)
  To: Fei Lv; +Cc: miklos, linux-unionfs, linux-kernel, lianghuxu

On Thu, Jul 18, 2024 at 6:43 AM Fei Lv <feilv@asrmicro.com> wrote:
>
> If a directory only exist in low layer, create a new file in it trigger
> directory copy-up. Permission lost of the new directory in upper layer
> was observed during power-cut stress test.

You should specify that this outcome happens on very specific upper fs
(i.e. ubifs) which does not enforce ordering on storing of metadata
changes.

>
> Fix by adding new mount opion "upsync=strict", make sure data/metadata of
> copied up directory written to disk before renaming from tmp to final
> destination.
>
> Signed-off-by: Fei Lv <feilv@asrmicro.com>
> ---
> OPT_sync changed to OPT_upsync since mount option "sync" already used.

I see. I don't like the name "upsync" so much, it has other meanings
how about using the option "fsync"?

Here is a suggested documentation (which should be accompanied to any patch)

diff --git a/Documentation/filesystems/overlayfs.rst
b/Documentation/filesystems/overlayfs.rst
index 165514401441..f8183ddf8c4d 100644
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -742,6 +742,42 @@ controlled by the "uuid" mount option, which
supports these values:
     mounted with "uuid=on".


+Durability and copy up
+----------------------
+
+The fsync(2) and fdatasync(2) system calls ensure that the metadata and data
+of a file, respectively, are safely written to the backing storage, which is
+expected to guarantee the existence of the information post system crash.
+
+Without the fdatasync(2) call, there is no guarantee that the observed data
+after a system crash will be either the old or the new data, but in practice,
+the observed data after crash is often the old or new data or a mix of both.
+
+When overlayfs file is modified for the first time, copy up will create a copy
+of the lower file and its parent directories in the upper layer.  In case of a
+system crash, if fdatasync(2) was not called after the modification, the upper
+file could end up with no data at all (i.e. zeros), which would be an unusual
+outcome.  To avoid this experience, overlayfs calls fsync(2) on the upper file
+before completing the copy up with rename(2) to make the copy up "atomic".
+
+Depending on the backing filesystem (e.g. ubifs), fsync(2) before rename(2) may
+not be enough to provide the "atomic" copy up behavior and fsync(2) on the
+copied up parent directories is required as well.
+
+Overlayfs can be tuned to prefer performance or durability when storing to the
+underlying upper layer.  This is controlled by the "fsync" mount option,
+which supports these values:
+
+- "ordered": (default)
+    Call fsync(2) on upper file before completion of copy up.
+- "strict":
+    Call fsync(2) on upper file and directories before completion of copy up.
+- "volatile": [*]
+    Prefer performance over durability (see `Volatile mount`_)
+
+[*] The mount option "volatile" is an alias to "fsync=volatile".
+
+
 Volatile mount
 --------------

>
>  fs/overlayfs/copy_up.c   | 21 +++++++++++++++++++++
>  fs/overlayfs/ovl_entry.h | 20 ++++++++++++++++++--
>  fs/overlayfs/params.c    | 33 +++++++++++++++++++++++++++++----
>  fs/overlayfs/super.c     |  2 +-
>  4 files changed, 69 insertions(+), 7 deletions(-)
>
> diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
> index a5ef2005a2cc..b6f021ad7a43 100644
> --- a/fs/overlayfs/copy_up.c
> +++ b/fs/overlayfs/copy_up.c
> @@ -243,6 +243,21 @@ static int ovl_verify_area(loff_t pos, loff_t pos2, loff_t len, loff_t totlen)
>         return 0;
>  }
>
> +static int ovl_copy_up_sync(struct path *path)
> +{
> +       struct file *new_file;
> +       int err;
> +
> +       new_file = ovl_path_open(path, O_LARGEFILE | O_WRONLY);

I don't think any of those O_ flags are needed for fsync.
Can a directory be opened O_WRONLY???

> +       if (IS_ERR(new_file))
> +               return PTR_ERR(new_file);
> +
> +       err = vfs_fsync(new_file, 0);
> +       fput(new_file);
> +
> +       return err;
> +}
> +
>  static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
>                             struct file *new_file, loff_t len)
>  {
> @@ -701,6 +716,9 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
>                 err = ovl_set_attr(ofs, temp, &c->stat);
>         inode_unlock(temp->d_inode);
>
> +       if (!err && ovl_should_sync_strict(ofs))
> +               err = ovl_copy_up_sync(&upperpath);
> +
>         return err;
>  }
>
> @@ -1104,6 +1122,9 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
>         ovl_clear_flag(OVL_HAS_DIGEST, d_inode(c->dentry));
>         ovl_clear_flag(OVL_VERIFIED_DIGEST, d_inode(c->dentry));
>         ovl_set_upperdata(d_inode(c->dentry));
> +
> +       if (!err && ovl_should_sync_strict(ofs))
> +               err = ovl_copy_up_sync(&upperpath);

fsync was probably already called in ovl_copy_up_file()
making this call redundant and fsync of the removal
of metacopy xattr does not add any safety.

>  out_free:
>         kfree(capability);
>  out:
> diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
> index cb449ab310a7..4592e6f7dcf7 100644
> --- a/fs/overlayfs/ovl_entry.h
> +++ b/fs/overlayfs/ovl_entry.h
> @@ -5,6 +5,12 @@
>   * Copyright (C) 2016 Red Hat, Inc.
>   */
>
> +enum {
> +       OVL_SYNC_DATA,
> +       OVL_SYNC_STRICT,
> +       OVL_SYNC_OFF,
> +};
> +
>  struct ovl_config {
>         char *upperdir;
>         char *workdir;
> @@ -18,7 +24,7 @@ struct ovl_config {
>         int xino;
>         bool metacopy;
>         bool userxattr;
> -       bool ovl_volatile;
> +       int sync_mode;
>  };
>
>  struct ovl_sb {
> @@ -120,7 +126,17 @@ static inline struct ovl_fs *OVL_FS(struct super_block *sb)
>
>  static inline bool ovl_should_sync(struct ovl_fs *ofs)
>  {
> -       return !ofs->config.ovl_volatile;
> +       return ofs->config.sync_mode == OVL_SYNC_DATA;

    return ofs->config.sync_mode != OVL_SYNC_OFF;
or
    return ofs->config.sync_mode != OVL_FSYNC_VOLATILE;

> +}
> +
> +static inline bool ovl_should_sync_strict(struct ovl_fs *ofs)
> +{
> +       return ofs->config.sync_mode == OVL_SYNC_STRICT;
> +}
> +
> +static inline bool ovl_is_volatile(struct ovl_config *config)
> +{
> +       return config->sync_mode == OVL_SYNC_OFF;
>  }
>
>  static inline unsigned int ovl_numlower(struct ovl_entry *oe)
> diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
> index 4860fcc4611b..5d5538dd3de7 100644
> --- a/fs/overlayfs/params.c
> +++ b/fs/overlayfs/params.c
> @@ -58,6 +58,7 @@ enum ovl_opt {
>         Opt_xino,
>         Opt_metacopy,
>         Opt_verity,
> +       Opt_upsync,
>         Opt_volatile,
>  };
>
> @@ -139,6 +140,23 @@ static int ovl_verity_mode_def(void)
>         return OVL_VERITY_OFF;
>  }
>
> +static const struct constant_table ovl_parameter_upsync[] = {
> +       { "data",       OVL_SYNC_DATA      },
> +       { "strict",     OVL_SYNC_STRICT    },
> +       { "off",        OVL_SYNC_OFF       },
> +       {}
> +};
> +
> +static const char *ovl_upsync_mode(struct ovl_config *config)
> +{
> +       return ovl_parameter_upsync[config->sync_mode].name;
> +}
> +
> +static int ovl_upsync_mode_def(void)
> +{
> +       return OVL_SYNC_DATA;
> +}
> +
>  const struct fs_parameter_spec ovl_parameter_spec[] = {
>         fsparam_string_empty("lowerdir",    Opt_lowerdir),
>         fsparam_string("lowerdir+",         Opt_lowerdir_add),
> @@ -154,6 +172,7 @@ const struct fs_parameter_spec ovl_parameter_spec[] = {
>         fsparam_enum("xino",                Opt_xino, ovl_parameter_xino),
>         fsparam_enum("metacopy",            Opt_metacopy, ovl_parameter_bool),
>         fsparam_enum("verity",              Opt_verity, ovl_parameter_verity),
> +       fsparam_enum("upsync",              Opt_upsync, ovl_parameter_upsync),
>         fsparam_flag("volatile",            Opt_volatile),
>         {}
>  };
> @@ -617,8 +636,11 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
>         case Opt_verity:
>                 config->verity_mode = result.uint_32;
>                 break;
> +       case Opt_upsync:
> +               config->sync_mode = result.uint_32;
> +               break;
>         case Opt_volatile:
> -               config->ovl_volatile = true;
> +               config->sync_mode = OVL_SYNC_OFF;
>                 break;
>         case Opt_userxattr:
>                 config->userxattr = true;
> @@ -802,9 +824,9 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
>                 config->index = false;
>         }
>
> -       if (!config->upperdir && config->ovl_volatile) {
> +       if (!config->upperdir && ovl_is_volatile(config)) {
>                 pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");

This message would be confusing if mount option is "syncup=off"
but if the option is "fsync=volatile" I think the message can stay as it is.

Thanks,
Amir.

> -               config->ovl_volatile = false;
> +               config->sync_mode = ovl_upsync_mode_def();
>         }
>
>         if (!config->upperdir && config->uuid == OVL_UUID_ON) {
> @@ -997,8 +1019,11 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry)
>         if (ofs->config.metacopy != ovl_metacopy_def)
>                 seq_printf(m, ",metacopy=%s",
>                            ofs->config.metacopy ? "on" : "off");
> -       if (ofs->config.ovl_volatile)
> +       if (ovl_is_volatile(&ofs->config))
>                 seq_puts(m, ",volatile");
> +       else if (ofs->config.sync_mode != ovl_upsync_mode_def())
> +               seq_printf(m, ",upsync=%s",
> +                          ovl_upsync_mode(&ofs->config));
>         if (ofs->config.userxattr)
>                 seq_puts(m, ",userxattr");
>         if (ofs->config.verity_mode != ovl_verity_mode_def())
> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
> index 06a231970cb5..824cbcf40523 100644
> --- a/fs/overlayfs/super.c
> +++ b/fs/overlayfs/super.c
> @@ -750,7 +750,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
>          * For volatile mount, create a incompat/volatile/dirty file to keep
>          * track of it.
>          */
> -       if (ofs->config.ovl_volatile) {
> +       if (ovl_is_volatile(&ofs->config)) {
>                 err = ovl_create_volatile_dirty(ofs);
>                 if (err < 0) {
>                         pr_err("Failed to create volatile/dirty file.\n");
>
> base-commit: 0c3836482481200ead7b416ca80c68a29cfdaabd
> --
> 2.45.2
>

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* 答复: [PATCH] ovl: fsync after metadata copy-up via mount option "upsync=strict"
  2024-07-19  7:23 ` Amir Goldstein
@ 2024-07-19  9:55   ` Lv Fei（吕飞）
  2024-07-20  7:29     ` Amir Goldstein
  0 siblings, 1 reply; 5+ messages in thread
From: Lv Fei（吕飞） @ 2024-07-19  9:55 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: miklos@szeredi.hu, linux-unionfs@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Xu Lianghu（徐良虎）


> 
> -----邮件原件-----
> 发件人: Amir Goldstein [mailto:amir73il@gmail.com] 
> 发送时间: 2024年7月19日 15:24
> 收件人: Lv Fei（吕飞） <feilv@asrmicro.com>
> 抄送: miklos@szeredi.hu; linux-unionfs@vger.kernel.org; linux-kernel@vger.kernel.org; Xu Lianghu（徐良虎） > <lianghuxu@asrmicro.com>
> 主题: Re: [PATCH] ovl: fsync after metadata copy-up via mount option "upsync=strict"
> 
> On Thu, Jul 18, 2024 at 6:43 AM Fei Lv <feilv@asrmicro.com> wrote:
> >
> > If a directory only exist in low layer, create a new file in it 
> > trigger directory copy-up. Permission lost of the new directory in 
> > upper layer was observed during power-cut stress test.
> 
> You should specify that this outcome happens on very specific upper fs (i.e. ubifs) which does not enforce ordering on storing of metadata changes.

OK.

> 
> >
> > Fix by adding new mount opion "upsync=strict", make sure data/metadata 
> > of copied up directory written to disk before renaming from tmp to 
> > final destination.
> >
> > Signed-off-by: Fei Lv <feilv@asrmicro.com>
> > ---
> > OPT_sync changed to OPT_upsync since mount option "sync" already used.
> 
> I see. I don't like the name "upsync" so much, it has other meanings how about using the option "fsync"?

OK.

> 
> Here is a suggested documentation (which should be accompanied to any patch)

OK.

> 
> diff --git a/Documentation/filesystems/overlayfs.rst
> b/Documentation/filesystems/overlayfs.rst
> index 165514401441..f8183ddf8c4d 100644
> --- a/Documentation/filesystems/overlayfs.rst
> +++ b/Documentation/filesystems/overlayfs.rst
> @@ -742,6 +742,42 @@ controlled by the "uuid" mount option, which supports these values:
>      mounted with "uuid=on".
> 
> 
> +Durability and copy up
> +----------------------
> +
> +The fsync(2) and fdatasync(2) system calls ensure that the metadata and 
> +data of a file, respectively, are safely written to the backing 
> +storage, which is expected to guarantee the existence of the information post system crash.
> +
> +Without the fdatasync(2) call, there is no guarantee that the observed 
> +data after a system crash will be either the old or the new data, but 
> +in practice, the observed data after crash is often the old or new data or a mix of both.
> +
> +When overlayfs file is modified for the first time, copy up will create 
> +a copy of the lower file and its parent directories in the upper layer.  
> +In case of a system crash, if fdatasync(2) was not called after the 
> +modification, the upper file could end up with no data at all (i.e. 
> +zeros), which would be an unusual outcome.  To avoid this experience, 
> +overlayfs calls fsync(2) on the upper file before completing the copy up with rename(2) to make the copy > up "atomic".
> +
> +Depending on the backing filesystem (e.g. ubifs), fsync(2) before 
> +rename(2) may not be enough to provide the "atomic" copy up behavior 
> +and fsync(2) on the copied up parent directories is required as well.
> +
> +Overlayfs can be tuned to prefer performance or durability when storing 
> +to the underlying upper layer.  This is controlled by the "fsync" mount 
> +option, which supports these values:
> +
> +- "ordered": (default)
> +    Call fsync(2) on upper file before completion of copy up.
> +- "strict":
> +    Call fsync(2) on upper file and directories before completion of copy up.
> +- "volatile": [*]
> +    Prefer performance over durability (see `Volatile mount`_)
> +
> +[*] The mount option "volatile" is an alias to "fsync=volatile".
> +
> +
>  Volatile mount
>  --------------
> 
> >
> >  fs/overlayfs/copy_up.c   | 21 +++++++++++++++++++++
> >  fs/overlayfs/ovl_entry.h | 20 ++++++++++++++++++--
> >  fs/overlayfs/params.c    | 33 +++++++++++++++++++++++++++++----
> >  fs/overlayfs/super.c     |  2 +-
> >  4 files changed, 69 insertions(+), 7 deletions(-)
> >
> > diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 
> > a5ef2005a2cc..b6f021ad7a43 100644
> > --- a/fs/overlayfs/copy_up.c
> > +++ b/fs/overlayfs/copy_up.c
> > @@ -243,6 +243,21 @@ static int ovl_verify_area(loff_t pos, loff_t pos2, loff_t len, loff_t totlen)
> >         return 0;
> >  }
> >
> > +static int ovl_copy_up_sync(struct path *path) {
> > +       struct file *new_file;
> > +       int err;
> > +
> > +       new_file = ovl_path_open(path, O_LARGEFILE | O_WRONLY);
> 
> I don't think any of those O_ flags are needed for fsync.
> Can a directory be opened O_WRONLY???

This function may be called with file or directory, shall I need to use different
flags? Such as below:

static int ovl_copy_up_sync(struct path *path, bool is_dir)
{
	struct file *new_file;
	int flags;
        int err;

        flags = is_dir ? (O_RDONLY | O_DIRECTORY) : (O_LARGEFILE | O_WRONLY);
	new_file = ovl_path_open(path, flags);
	if (IS_ERR(new_file))
		return PTR_ERR(new_file);

	err = vfs_fsync(new_file, 0);
	fput(new_file);

	return err;
}

> 
> > +       if (IS_ERR(new_file))
> > +               return PTR_ERR(new_file);
> > +
> > +       err = vfs_fsync(new_file, 0);
> > +       fput(new_file);
> > +
> > +       return err;
> > +}
> > +
> >  static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
> >                             struct file *new_file, loff_t len)  { @@ 
> > -701,6 +716,9 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
> >                 err = ovl_set_attr(ofs, temp, &c->stat);
> >         inode_unlock(temp->d_inode);
> >
> > +       if (!err && ovl_should_sync_strict(ofs))
> > +               err = ovl_copy_up_sync(&upperpath);
> > +
> >         return err;
> >  }
> >
> > @@ -1104,6 +1122,9 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
> >         ovl_clear_flag(OVL_HAS_DIGEST, d_inode(c->dentry));
> >         ovl_clear_flag(OVL_VERIFIED_DIGEST, d_inode(c->dentry));
> >         ovl_set_upperdata(d_inode(c->dentry));
> > +
> > +       if (!err && ovl_should_sync_strict(ofs))
> > +               err = ovl_copy_up_sync(&upperpath);
> 
> fsync was probably already called in ovl_copy_up_file() making this call redundant and fsync of the removal > of metacopy xattr does not add any safety.

My original idea was that ovl_should_sync and ovl_should_sync_strict should not be enabled at the same time. The reasons are as follows:
If bothe ovl_should_sync and ovl_should_sync_strict return ture for "fsync=strict",
and power cut between ovl_copy_up_file and ovl_copy_up_metadata for file copy-up,
seems this file may also lost permission? 

> 
> >  out_free:
> >         kfree(capability);
> >  out:
> > diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 
> > cb449ab310a7..4592e6f7dcf7 100644
> > --- a/fs/overlayfs/ovl_entry.h
> > +++ b/fs/overlayfs/ovl_entry.h
> > @@ -5,6 +5,12 @@
> >   * Copyright (C) 2016 Red Hat, Inc.
> >   */
> >
> > +enum {
> > +       OVL_SYNC_DATA,
> > +       OVL_SYNC_STRICT,
> > +       OVL_SYNC_OFF,
> > +};
> > +
> >  struct ovl_config {
> >         char *upperdir;
> >         char *workdir;
> > @@ -18,7 +24,7 @@ struct ovl_config {
> >         int xino;
> >         bool metacopy;
> >         bool userxattr;
> > -       bool ovl_volatile;
> > +       int sync_mode;
> >  };
> >
> >  struct ovl_sb {
> > @@ -120,7 +126,17 @@ static inline struct ovl_fs *OVL_FS(struct 
> > super_block *sb)
> >
> >  static inline bool ovl_should_sync(struct ovl_fs *ofs)  {
> > -       return !ofs->config.ovl_volatile;
> > +       return ofs->config.sync_mode == OVL_SYNC_DATA;
> 
>     return ofs->config.sync_mode != OVL_SYNC_OFF; or
>     return ofs->config.sync_mode != OVL_FSYNC_VOLATILE;

There are risks if ovl_should_sync and ovl_should_sync_strict enabled at the same time.
The reasons are above.

> 
> > +}
> > +
> > +static inline bool ovl_should_sync_strict(struct ovl_fs *ofs) {
> > +       return ofs->config.sync_mode == OVL_SYNC_STRICT; }
> > +
> > +static inline bool ovl_is_volatile(struct ovl_config *config) {
> > +       return config->sync_mode == OVL_SYNC_OFF;
> >  }
> >
> >  static inline unsigned int ovl_numlower(struct ovl_entry *oe) diff 
> > --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 
> > 4860fcc4611b..5d5538dd3de7 100644
> > --- a/fs/overlayfs/params.c
> > +++ b/fs/overlayfs/params.c
> > @@ -58,6 +58,7 @@ enum ovl_opt {
> >         Opt_xino,
> >         Opt_metacopy,
> >         Opt_verity,
> > +       Opt_upsync,
> >         Opt_volatile,
> >  };
> >
> > @@ -139,6 +140,23 @@ static int ovl_verity_mode_def(void)
> >         return OVL_VERITY_OFF;
> >  }
> >
> > +static const struct constant_table ovl_parameter_upsync[] = {
> > +       { "data",       OVL_SYNC_DATA      },
> > +       { "strict",     OVL_SYNC_STRICT    },
> > +       { "off",        OVL_SYNC_OFF       },
> > +       {}
> > +};
> > +
> > +static const char *ovl_upsync_mode(struct ovl_config *config) {
> > +       return ovl_parameter_upsync[config->sync_mode].name;
> > +}
> > +
> > +static int ovl_upsync_mode_def(void)
> > +{
> > +       return OVL_SYNC_DATA;
> > +}
> > +
> >  const struct fs_parameter_spec ovl_parameter_spec[] = {
> >         fsparam_string_empty("lowerdir",    Opt_lowerdir),
> >         fsparam_string("lowerdir+",         Opt_lowerdir_add),
> > @@ -154,6 +172,7 @@ const struct fs_parameter_spec ovl_parameter_spec[] = {
> >         fsparam_enum("xino",                Opt_xino, ovl_parameter_xino),
> >         fsparam_enum("metacopy",            Opt_metacopy, ovl_parameter_bool),
> >         fsparam_enum("verity",              Opt_verity, ovl_parameter_verity),
> > +       fsparam_enum("upsync",              Opt_upsync, ovl_parameter_upsync),
> >         fsparam_flag("volatile",            Opt_volatile),
> >         {}
> >  };
> > @@ -617,8 +636,11 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
> >         case Opt_verity:
> >                 config->verity_mode = result.uint_32;
> >                 break;
> > +       case Opt_upsync:
> > +               config->sync_mode = result.uint_32;
> > +               break;
> >         case Opt_volatile:
> > -               config->ovl_volatile = true;
> > +               config->sync_mode = OVL_SYNC_OFF;
> >                 break;
> >         case Opt_userxattr:
> >                 config->userxattr = true; @@ -802,9 +824,9 @@ int 
> > ovl_fs_params_verify(const struct ovl_fs_context *ctx,
> >                 config->index = false;
> >         }
> >
> > -       if (!config->upperdir && config->ovl_volatile) {
> > +       if (!config->upperdir && ovl_is_volatile(config)) {
> >                 pr_info("option \"volatile\" is meaningless in a 
> > non-upper mount, ignoring it.\n");
> 
> This message would be confusing if mount option is "syncup=off"
> but if the option is "fsync=volatile" I think the message can stay as it is.
> 
> Thanks,
> Amir.

Yes. That sounds good! 
We thought this place was a little weird, too...

> 
> > -               config->ovl_volatile = false;
> > +               config->sync_mode = ovl_upsync_mode_def();
> >         }
> >
> >         if (!config->upperdir && config->uuid == OVL_UUID_ON) { @@ 
> > -997,8 +1019,11 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry)
> >         if (ofs->config.metacopy != ovl_metacopy_def)
> >                 seq_printf(m, ",metacopy=%s",
> >                            ofs->config.metacopy ? "on" : "off");
> > -       if (ofs->config.ovl_volatile)
> > +       if (ovl_is_volatile(&ofs->config))
> >                 seq_puts(m, ",volatile");
> > +       else if (ofs->config.sync_mode != ovl_upsync_mode_def())
> > +               seq_printf(m, ",upsync=%s",
> > +                          ovl_upsync_mode(&ofs->config));
> >         if (ofs->config.userxattr)
> >                 seq_puts(m, ",userxattr");
> >         if (ofs->config.verity_mode != ovl_verity_mode_def()) diff 
> > --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 
> > 06a231970cb5..824cbcf40523 100644
> > --- a/fs/overlayfs/super.c
> > +++ b/fs/overlayfs/super.c
> > @@ -750,7 +750,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
> >          * For volatile mount, create a incompat/volatile/dirty file to keep
> >          * track of it.
> >          */
> > -       if (ofs->config.ovl_volatile) {
> > +       if (ovl_is_volatile(&ofs->config)) {
> >                 err = ovl_create_volatile_dirty(ofs);
> >                 if (err < 0) {
> >                         pr_err("Failed to create volatile/dirty 
> > file.\n");
> >
> > base-commit: 0c3836482481200ead7b416ca80c68a29cfdaabd
> > --
> > 2.45.2
> >

Thanks,
Fei

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ovl: fsync after metadata copy-up via mount option "upsync=strict"
  2024-07-19  9:55   ` 答复: " Lv Fei（吕飞）
@ 2024-07-20  7:29     ` Amir Goldstein
  2024-07-22  7:37       ` 答复: " Lv Fei（吕飞）
  0 siblings, 1 reply; 5+ messages in thread
From: Amir Goldstein @ 2024-07-20  7:29 UTC (permalink / raw)
  To: Lv Fei（吕飞）
  Cc: miklos@szeredi.hu, linux-unionfs@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Xu Lianghu（徐良虎）

On Fri, Jul 19, 2024 at 12:55 PM Lv Fei（吕飞） <feilv@asrmicro.com> wrote:
>
>
> >
> > -----邮件原件-----
> > 发件人: Amir Goldstein [mailto:amir73il@gmail.com]
> > 发送时间: 2024年7月19日 15:24
> > 收件人: Lv Fei（吕飞） <feilv@asrmicro.com>
> > 抄送: miklos@szeredi.hu; linux-unionfs@vger.kernel.org; linux-kernel@vger.kernel.org; Xu Lianghu（徐良虎） > <lianghuxu@asrmicro.com>
> > 主题: Re: [PATCH] ovl: fsync after metadata copy-up via mount option "upsync=strict"
> >
> > On Thu, Jul 18, 2024 at 6:43 AM Fei Lv <feilv@asrmicro.com> wrote:
> > >
> > > If a directory only exist in low layer, create a new file in it
> > > trigger directory copy-up. Permission lost of the new directory in
> > > upper layer was observed during power-cut stress test.
> >
> > You should specify that this outcome happens on very specific upper fs (i.e. ubifs) which does not enforce ordering on storing of metadata changes.
>
> OK.
>
> >
> > >
> > > Fix by adding new mount opion "upsync=strict", make sure data/metadata
> > > of copied up directory written to disk before renaming from tmp to
> > > final destination.
> > >
> > > Signed-off-by: Fei Lv <feilv@asrmicro.com>
> > > ---
> > > OPT_sync changed to OPT_upsync since mount option "sync" already used.
> >
> > I see. I don't like the name "upsync" so much, it has other meanings how about using the option "fsync"?
>
> OK.
>
> >
> > Here is a suggested documentation (which should be accompanied to any patch)
>
> OK.
>
> >
> > diff --git a/Documentation/filesystems/overlayfs.rst
> > b/Documentation/filesystems/overlayfs.rst
> > index 165514401441..f8183ddf8c4d 100644
> > --- a/Documentation/filesystems/overlayfs.rst
> > +++ b/Documentation/filesystems/overlayfs.rst
> > @@ -742,6 +742,42 @@ controlled by the "uuid" mount option, which supports these values:
> >      mounted with "uuid=on".
> >
> >
> > +Durability and copy up
> > +----------------------
> > +
> > +The fsync(2) and fdatasync(2) system calls ensure that the metadata and
> > +data of a file, respectively, are safely written to the backing
> > +storage, which is expected to guarantee the existence of the information post system crash.
> > +
> > +Without the fdatasync(2) call, there is no guarantee that the observed
> > +data after a system crash will be either the old or the new data, but
> > +in practice, the observed data after crash is often the old or new data or a mix of both.
> > +
> > +When overlayfs file is modified for the first time, copy up will create
> > +a copy of the lower file and its parent directories in the upper layer.
> > +In case of a system crash, if fdatasync(2) was not called after the
> > +modification, the upper file could end up with no data at all (i.e.
> > +zeros), which would be an unusual outcome.  To avoid this experience,
> > +overlayfs calls fsync(2) on the upper file before completing the copy up with rename(2) to make the copy > up "atomic".
> > +
> > +Depending on the backing filesystem (e.g. ubifs), fsync(2) before
> > +rename(2) may not be enough to provide the "atomic" copy up behavior
> > +and fsync(2) on the copied up parent directories is required as well.
> > +
> > +Overlayfs can be tuned to prefer performance or durability when storing
> > +to the underlying upper layer.  This is controlled by the "fsync" mount
> > +option, which supports these values:
> > +
> > +- "ordered": (default)
> > +    Call fsync(2) on upper file before completion of copy up.
> > +- "strict":
> > +    Call fsync(2) on upper file and directories before completion of copy up.
> > +- "volatile": [*]
> > +    Prefer performance over durability (see `Volatile mount`_)
> > +
> > +[*] The mount option "volatile" is an alias to "fsync=volatile".
> > +
> > +
> >  Volatile mount
> >  --------------
> >
> > >
> > >  fs/overlayfs/copy_up.c   | 21 +++++++++++++++++++++
> > >  fs/overlayfs/ovl_entry.h | 20 ++++++++++++++++++--
> > >  fs/overlayfs/params.c    | 33 +++++++++++++++++++++++++++++----
> > >  fs/overlayfs/super.c     |  2 +-
> > >  4 files changed, 69 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index
> > > a5ef2005a2cc..b6f021ad7a43 100644
> > > --- a/fs/overlayfs/copy_up.c
> > > +++ b/fs/overlayfs/copy_up.c
> > > @@ -243,6 +243,21 @@ static int ovl_verify_area(loff_t pos, loff_t pos2, loff_t len, loff_t totlen)
> > >         return 0;
> > >  }
> > >
> > > +static int ovl_copy_up_sync(struct path *path) {
> > > +       struct file *new_file;
> > > +       int err;
> > > +
> > > +       new_file = ovl_path_open(path, O_LARGEFILE | O_WRONLY);
> >
> > I don't think any of those O_ flags are needed for fsync.
> > Can a directory be opened O_WRONLY???
>
> This function may be called with file or directory, shall I need to use different
> flags? Such as below:
>
> static int ovl_copy_up_sync(struct path *path, bool is_dir)
> {
>         struct file *new_file;
>         int flags;
>         int err;
>
>         flags = is_dir ? (O_RDONLY | O_DIRECTORY) : (O_LARGEFILE | O_WRONLY);
>         new_file = ovl_path_open(path, flags);
>         if (IS_ERR(new_file))
>                 return PTR_ERR(new_file);
>
>         err = vfs_fsync(new_file, 0);
 >         fput(new_file);
>
>         return err;
> }
>

You do not need O_WRONLY nor O_LARGEFILE for fsync of a regular file
just use O_RDONLY unconditionally.

> >
> > > +       if (IS_ERR(new_file))
> > > +               return PTR_ERR(new_file);
> > > +
> > > +       err = vfs_fsync(new_file, 0);
> > > +       fput(new_file);
> > > +
> > > +       return err;
> > > +}
> > > +
> > >  static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
> > >                             struct file *new_file, loff_t len)  { @@
> > > -701,6 +716,9 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
> > >                 err = ovl_set_attr(ofs, temp, &c->stat);
> > >         inode_unlock(temp->d_inode);
> > >
> > > +       if (!err && ovl_should_sync_strict(ofs))
> > > +               err = ovl_copy_up_sync(&upperpath);
> > > +
> > >         return err;
> > >  }
> > >
> > > @@ -1104,6 +1122,9 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
> > >         ovl_clear_flag(OVL_HAS_DIGEST, d_inode(c->dentry));
> > >         ovl_clear_flag(OVL_VERIFIED_DIGEST, d_inode(c->dentry));
> > >         ovl_set_upperdata(d_inode(c->dentry));
> > > +
> > > +       if (!err && ovl_should_sync_strict(ofs))
> > > +               err = ovl_copy_up_sync(&upperpath);
> >
> > fsync was probably already called in ovl_copy_up_file() making this call redundant and fsync of the removal > of metacopy xattr does not add any safety.
>
> My original idea was that ovl_should_sync and ovl_should_sync_strict should not be enabled at the same time.

You have it wrong.

The ovl_should_sync() helper does not mean sync_mode==ordered,
it means sync_mode!=volatile
It literally means "should overlayfs respect fsync"
and it is used in several places in the code.

So ovl_should_sync_strict() always implies ovl_should_sync().

> The reasons are as follows:
> If bothe ovl_should_sync and ovl_should_sync_strict return ture for "fsync=strict",
> and power cut between ovl_copy_up_file and ovl_copy_up_metadata for file copy-up,
> seems this file may also lost permission?

fsync of file in ovl_copy_up_file() the file is either an O_TMPFILE
or in the workdir. no risk involved even with ubifs.

>
> >
> > >  out_free:
> > >         kfree(capability);
> > >  out:
> > > diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index
> > > cb449ab310a7..4592e6f7dcf7 100644
> > > --- a/fs/overlayfs/ovl_entry.h
> > > +++ b/fs/overlayfs/ovl_entry.h
> > > @@ -5,6 +5,12 @@
> > >   * Copyright (C) 2016 Red Hat, Inc.
> > >   */
> > >
> > > +enum {
> > > +       OVL_SYNC_DATA,
> > > +       OVL_SYNC_STRICT,
> > > +       OVL_SYNC_OFF,
> > > +};
> > > +
> > >  struct ovl_config {
> > >         char *upperdir;
> > >         char *workdir;
> > > @@ -18,7 +24,7 @@ struct ovl_config {
> > >         int xino;
> > >         bool metacopy;
> > >         bool userxattr;
> > > -       bool ovl_volatile;
> > > +       int sync_mode;
> > >  };
> > >
> > >  struct ovl_sb {
> > > @@ -120,7 +126,17 @@ static inline struct ovl_fs *OVL_FS(struct
> > > super_block *sb)
> > >
> > >  static inline bool ovl_should_sync(struct ovl_fs *ofs)  {
> > > -       return !ofs->config.ovl_volatile;
> > > +       return ofs->config.sync_mode == OVL_SYNC_DATA;
> >
> >     return ofs->config.sync_mode != OVL_SYNC_OFF; or
> >     return ofs->config.sync_mode != OVL_FSYNC_VOLATILE;
>
> There are risks if ovl_should_sync and ovl_should_sync_strict enabled at the same time.
> The reasons are above.
>

No. see above.

Let me know if I misunderstood your concern.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* 答复: [PATCH] ovl: fsync after metadata copy-up via mount option "upsync=strict"
  2024-07-20  7:29     ` Amir Goldstein
@ 2024-07-22  7:37       ` Lv Fei（吕飞）
  0 siblings, 0 replies; 5+ messages in thread
From: Lv Fei（吕飞） @ 2024-07-22  7:37 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: miklos@szeredi.hu, linux-unionfs@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Xu Lianghu（徐良虎）



> -----邮件原件-----
> 发件人: Amir Goldstein [mailto:amir73il@gmail.com] 
> 发送时间: 2024年7月20日 15:30
> 收件人: Lv Fei（吕飞） <feilv@asrmicro.com>
> 抄送: miklos@szeredi.hu; linux-unionfs@vger.kernel.org; linux-kernel@vger.kernel.org; Xu Lianghu（徐良虎） <lianghuxu@asrmicro.com>
> 主题: Re: [PATCH] ovl: fsync after metadata copy-up via mount option "upsync=strict"
> 
> On Fri, Jul 19, 2024 at 12:55 PM Lv Fei（吕飞） <feilv@asrmicro.com> wrote:
> >
> >
> > >
> > > -----邮件原件-----
> > > 发件人: Amir Goldstein [mailto:amir73il@gmail.com]
> > > 发送时间: 2024年7月19日 15:24
> > > 收件人: Lv Fei（吕飞） <feilv@asrmicro.com>
> > > 抄送: miklos@szeredi.hu; linux-unionfs@vger.kernel.org; 
> > > linux-kernel@vger.kernel.org; Xu Lianghu（徐良虎） > 
> > > <lianghuxu@asrmicro.com>
> > > 主题: Re: [PATCH] ovl: fsync after metadata copy-up via mount option "upsync=strict"
> > >
> > > On Thu, Jul 18, 2024 at 6:43 AM Fei Lv <feilv@asrmicro.com> wrote:
> > > >
> > > > If a directory only exist in low layer, create a new file in it 
> > > > trigger directory copy-up. Permission lost of the new directory in 
> > > > upper layer was observed during power-cut stress test.
> > >
> > > You should specify that this outcome happens on very specific upper fs (i.e. ubifs) which does not enforce ordering on storing of metadata changes.
> >
> > OK.
> >
> > >
> > > >
> > > > Fix by adding new mount opion "upsync=strict", make sure 
> > > > data/metadata of copied up directory written to disk before 
> > > > renaming from tmp to final destination.
> > > >
> > > > Signed-off-by: Fei Lv <feilv@asrmicro.com>
> > > > ---
> > > > OPT_sync changed to OPT_upsync since mount option "sync" already used.
> > >
> > > I see. I don't like the name "upsync" so much, it has other meanings how about using the option "fsync"?
> >
> > OK.
> >
> > >
> > > Here is a suggested documentation (which should be accompanied to 
> > > any patch)
> >
> > OK.
> >
> > >
> > > diff --git a/Documentation/filesystems/overlayfs.rst
> > > b/Documentation/filesystems/overlayfs.rst
> > > index 165514401441..f8183ddf8c4d 100644
> > > --- a/Documentation/filesystems/overlayfs.rst
> > > +++ b/Documentation/filesystems/overlayfs.rst
> > > @@ -742,6 +742,42 @@ controlled by the "uuid" mount option, which supports these values:
> > >      mounted with "uuid=on".
> > >
> > >
> > > +Durability and copy up
> > > +----------------------
> > > +
> > > +The fsync(2) and fdatasync(2) system calls ensure that the metadata 
> > > +and data of a file, respectively, are safely written to the backing 
> > > +storage, which is expected to guarantee the existence of the information post system crash.
> > > +
> > > +Without the fdatasync(2) call, there is no guarantee that the 
> > > +observed data after a system crash will be either the old or the 
> > > +new data, but in practice, the observed data after crash is often the old or new data or a mix of both.
> > > +
> > > +When overlayfs file is modified for the first time, copy up will 
> > > +create a copy of the lower file and its parent directories in the upper layer.
> > > +In case of a system crash, if fdatasync(2) was not called after the 
> > > +modification, the upper file could end up with no data at all (i.e.
> > > +zeros), which would be an unusual outcome.  To avoid this 
> > > +experience, overlayfs calls fsync(2) on the upper file before completing the copy up with rename(2) to make the copy > up "atomic".
> > > +
> > > +Depending on the backing filesystem (e.g. ubifs), fsync(2) before
> > > +rename(2) may not be enough to provide the "atomic" copy up 
> > > +behavior and fsync(2) on the copied up parent directories is required as well.
> > > +
> > > +Overlayfs can be tuned to prefer performance or durability when 
> > > +storing to the underlying upper layer.  This is controlled by the 
> > > +"fsync" mount option, which supports these values:
> > > +
> > > +- "ordered": (default)
> > > +    Call fsync(2) on upper file before completion of copy up.
> > > +- "strict":
> > > +    Call fsync(2) on upper file and directories before completion of copy up.
> > > +- "volatile": [*]
> > > +    Prefer performance over durability (see `Volatile mount`_)
> > > +
> > > +[*] The mount option "volatile" is an alias to "fsync=volatile".
> > > +
> > > +
> > >  Volatile mount
> > >  --------------
> > >
> > > >
> > > >  fs/overlayfs/copy_up.c   | 21 +++++++++++++++++++++
> > > >  fs/overlayfs/ovl_entry.h | 20 ++++++++++++++++++--
> > > >  fs/overlayfs/params.c    | 33 +++++++++++++++++++++++++++++----
> > > >  fs/overlayfs/super.c     |  2 +-
> > > >  4 files changed, 69 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index
> > > > a5ef2005a2cc..b6f021ad7a43 100644
> > > > --- a/fs/overlayfs/copy_up.c
> > > > +++ b/fs/overlayfs/copy_up.c
> > > > @@ -243,6 +243,21 @@ static int ovl_verify_area(loff_t pos, loff_t pos2, loff_t len, loff_t totlen)
> > > >         return 0;
> > > >  }
> > > >
> > > > +static int ovl_copy_up_sync(struct path *path) {
> > > > +       struct file *new_file;
> > > > +       int err;
> > > > +
> > > > +       new_file = ovl_path_open(path, O_LARGEFILE | O_WRONLY);
> > >
> > > I don't think any of those O_ flags are needed for fsync.
> > > Can a directory be opened O_WRONLY???
> >
> > This function may be called with file or directory, shall I need to 
> > use different flags? Such as below:
> >
> > static int ovl_copy_up_sync(struct path *path, bool is_dir) {
> >         struct file *new_file;
> >         int flags;
> >         int err;
> >
> >         flags = is_dir ? (O_RDONLY | O_DIRECTORY) : (O_LARGEFILE | O_WRONLY);
> >         new_file = ovl_path_open(path, flags);
> >         if (IS_ERR(new_file))
> >                 return PTR_ERR(new_file);
> >
> >         err = vfs_fsync(new_file, 0);
>  >         fput(new_file);
> >
> >         return err;
> > }
> >
> 
> You do not need O_WRONLY nor O_LARGEFILE for fsync of a regular file just use O_RDONLY unconditionally.

OK.

> 
> > >
> > > > +       if (IS_ERR(new_file))
> > > > +               return PTR_ERR(new_file);
> > > > +
> > > > +       err = vfs_fsync(new_file, 0);
> > > > +       fput(new_file);
> > > > +
> > > > +       return err;
> > > > +}
> > > > +
> > > >  static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
> > > >                             struct file *new_file, loff_t len)  { 
> > > > @@
> > > > -701,6 +716,9 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
> > > >                 err = ovl_set_attr(ofs, temp, &c->stat);
> > > >         inode_unlock(temp->d_inode);
> > > >
> > > > +       if (!err && ovl_should_sync_strict(ofs))
> > > > +               err = ovl_copy_up_sync(&upperpath);
> > > > +
> > > >         return err;
> > > >  }
> > > >
> > > > @@ -1104,6 +1122,9 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
> > > >         ovl_clear_flag(OVL_HAS_DIGEST, d_inode(c->dentry));
> > > >         ovl_clear_flag(OVL_VERIFIED_DIGEST, d_inode(c->dentry));
> > > >         ovl_set_upperdata(d_inode(c->dentry));
> > > > +
> > > > +       if (!err && ovl_should_sync_strict(ofs))
> > > > +               err = ovl_copy_up_sync(&upperpath);
> > >
> > > fsync was probably already called in ovl_copy_up_file() making this call redundant and fsync of the removal > of metacopy xattr does not add any safety.
> >
> > My original idea was that ovl_should_sync and ovl_should_sync_strict should not be enabled at the same time.
> 
> You have it wrong.
> 
> The ovl_should_sync() helper does not mean sync_mode==ordered, it means sync_mode!=volatile It literally means "should overlayfs respect fsync"
> and it is used in several places in the code.
> 
> So ovl_should_sync_strict() always implies ovl_should_sync().
> 
> > The reasons are as follows:
> > If bothe ovl_should_sync and ovl_should_sync_strict return ture for 
> > "fsync=strict", and power cut between ovl_copy_up_file and 
> > ovl_copy_up_metadata for file copy-up, seems this file may also lost permission?
> 
> fsync of file in ovl_copy_up_file() the file is either an O_TMPFILE or in the workdir. no risk involved even with ubifs.

Understand, changes not actually updated to destination file at this time.

> 
> >
> > >
> > > >  out_free:
> > > >         kfree(capability);
> > > >  out:
> > > > diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h 
> > > > index
> > > > cb449ab310a7..4592e6f7dcf7 100644
> > > > --- a/fs/overlayfs/ovl_entry.h
> > > > +++ b/fs/overlayfs/ovl_entry.h
> > > > @@ -5,6 +5,12 @@
> > > >   * Copyright (C) 2016 Red Hat, Inc.
> > > >   */
> > > >
> > > > +enum {
> > > > +       OVL_SYNC_DATA,
> > > > +       OVL_SYNC_STRICT,
> > > > +       OVL_SYNC_OFF,
> > > > +};
> > > > +
> > > >  struct ovl_config {
> > > >         char *upperdir;
> > > >         char *workdir;
> > > > @@ -18,7 +24,7 @@ struct ovl_config {
> > > >         int xino;
> > > >         bool metacopy;
> > > >         bool userxattr;
> > > > -       bool ovl_volatile;
> > > > +       int sync_mode;
> > > >  };
> > > >
> > > >  struct ovl_sb {
> > > > @@ -120,7 +126,17 @@ static inline struct ovl_fs *OVL_FS(struct 
> > > > super_block *sb)
> > > >
> > > >  static inline bool ovl_should_sync(struct ovl_fs *ofs)  {
> > > > -       return !ofs->config.ovl_volatile;
> > > > +       return ofs->config.sync_mode == OVL_SYNC_DATA;
> > >
> > >     return ofs->config.sync_mode != OVL_SYNC_OFF; or
> > >     return ofs->config.sync_mode != OVL_FSYNC_VOLATILE;
> >
> > There are risks if ovl_should_sync and ovl_should_sync_strict enabled at the same time.
> > The reasons are above.
> >
> 
> No. see above.
> 
> Let me know if I misunderstood your concern.
> 
> Thanks,
> Amir.

I will post patch V2 later.

Thanks,
Fei

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-07-22  7:37 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-18  3:43 [PATCH] ovl: fsync after metadata copy-up via mount option "upsync=strict" Fei Lv
2024-07-19  7:23 ` Amir Goldstein
2024-07-19  9:55   ` 答复: " Lv Fei（吕飞）
2024-07-20  7:29     ` Amir Goldstein
2024-07-22  7:37       ` 答复: " Lv Fei（吕飞）

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox